Mande valency

Last update: 2024-01-25

Для всех пар языков одно наблюдение на каждый стимул. Здесь одно и то же расстояние по хитмэпу, кластеризации:

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type) |>  
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)   |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  mutate(item1 = factor(item1, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana")),
         item2 = factor(item2, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana"))) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = percentage), colour = "white") +
  geom_text(aes(label = str_c(percentage, "%")), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "", title = "Heatmap with all stimuli") +
  theme(legend.position = "bottom")

Code
df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = 100-round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(-item1) |> 
  as.dist() ->
  dist_gold_standard

library("ape")
dist_gold_standard |>   
  hclust() |> 
  as.phylo() %>% 
  plot(main = "Clusterization with all stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Code
dist_gold_standard |> 
  neighborNet() |> 
  plot()
title(main = "Heatmap for all stimuli")

Все примеры, без фильтрации

Code
df_wide |>
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)})) |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Все примеры, без фильтрации",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Все примеры, без фильтрации, одно наблюдение на стимул

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type) |>  
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

Для всех пар языков одно наблюдение на каждый стимул. Стимулов в каждой паре языков разное количество:

Code
df_pairwise_total |> 
  pivot_wider(names_from = item2, values_from = total) |> 
  select(item1, Dan_Gweetaa, Guro, Kono, Kpelle, Looma, Mano, Bamana) |> 
  knitr::kable()
item1 Dan_Gweetaa Guro Kono Kpelle Looma Mano Bamana
Dan_Gweetaa NA 94 101 105 95 107 108
Guro 94 NA 99 98 96 102 105
Kono 101 99 NA 110 101 113 113
Kpelle 105 98 110 NA 100 115 114
Looma 95 96 101 100 NA 104 106
Mano 107 102 113 115 104 NA 118
Bamana 108 105 113 114 106 118 NA
Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)   |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = 100-round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(-item1) |> 
  as.dist() |>   
  hclust() |> 
  as.phylo() %>% 
  plot(main = "Clusterization with all stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Только стимулы, для которых есть все языки, одно наблюдение на стимул

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type) |>  
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

Всего 83 стимула в каждом языке:

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)   |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = 100-round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(-item1) |> 
  as.dist() |>   
  hclust() |> 
  as.phylo() %>% 
  plot(main = "Clusterization with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Только стимулы, для которых есть все языки, случайная конструкция для каждого языка на стимул

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary")  |> 
  hclust(method = "average") |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Я вынул из таблицы данные по дан, гуро, коно, кпелле, лоома, мано и сделал столбцы

  • number
  • cunstruction
  • exteded_cunstruction
  • postposition
  • language

Видимо, все эти примеры надо поправить. Сколько у нас наблюдений по каждому языку?

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  count(language)
language n
Bamana 157
Dan_Gweetaa 134
Guro 120
Kono 115
Kpelle 145
Looma 115
Mano 213

А пока давайте посмотрим на общую таблицу:

Code
df_wide <- readxl::read_xlsx("../GM_merged_wide.xlsx")

Можно посчитать, сколько раз пары языков заполняют один и тот же стимул:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language) |> 
  distinct(language, number) |> 
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

df_pairwise_total |> 
  pivot_wider(names_from = item2, values_from = total) |> 
  arrange(item1)
item1 Bamana Dan_Gweetaa Guro Kono Kpelle Looma Mano
Bamana NA 108 105 113 114 106 118
Dan_Gweetaa 108 NA 94 101 105 95 107
Guro 105 94 NA 99 98 96 102
Kono 113 101 99 NA 110 101 113
Kpelle 114 105 98 110 NA 100 115
Looma 106 95 96 101 100 NA 104
Mano 118 107 102 113 115 104 NA

Можно посчитать, сколько раз пары языков заполняют одну и ту же конструкцию (т. е. совпадают в конструкции, в одном и том же стимуле):

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type) |> 
  mutate(n = 1) |> # to unify
  group_by(item1, item2) |> 
  reframe(n = sum(n)) ->
  df_pairwise_within_construction

df_pairwise_within_construction |>
  pivot_wider(names_from = item2, values_from = n) |> 
  arrange(item1) |> 
  select(item1, Dan_Gweetaa, Guro, Kono, Kpelle, Looma, Mano, Bamana) 
item1 Dan_Gweetaa Guro Kono Kpelle Looma Mano Bamana
Bamana 50 54 47 52 54 54 NA
Dan_Gweetaa NA 61 51 56 56 64 50
Guro 61 NA 52 60 57 65 54
Kono 51 52 NA 89 58 85 47
Kpelle 56 60 89 NA 65 95 52
Looma 56 57 58 65 NA 67 54
Mano 64 65 85 95 67 NA 54

Мне кажется, что чтобы посчитать процент совпадений, о котором просила Маша, нужно разделить последнюю таблицу на предпоследнюю:

Code
df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |> 
  mutate(percentage = round(n/total*100, 3)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(item1, Dan_Gweetaa, Guro, Kono, Kpelle, Looma, Mano, Bamana)
item1 Dan_Gweetaa Guro Kono Kpelle Looma Mano Bamana
Bamana 46.296 51.429 41.593 45.614 50.943 45.763 NA
Dan_Gweetaa NA 64.894 50.495 53.333 58.947 59.813 46.296
Guro 64.894 NA 52.525 61.224 59.375 63.725 51.429
Kono 50.495 52.525 NA 80.909 57.426 75.221 41.593
Kpelle 53.333 61.224 80.909 NA 65.000 82.609 45.614
Looma 58.947 59.375 57.426 65.000 NA 64.423 50.943
Mano 59.813 63.725 75.221 82.609 64.423 NA 45.763

Можно построить тепловую карту:

Code
df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |> 
  mutate(percentage = round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = percentage), colour = "white") +
  geom_text(aes(label = str_c(percentage, "%")), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "") +
  theme(legend.position = "bottom")

Теперь можем провести быструю кластеризацию:

Code
df_wide |>
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)})) |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary")  ->
  all_stimuli

all_stimuli |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Все примеры, без фильтрации",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Code
all_stimuli |> 
  neighborNet() |> 
  plot()
title(main = "Все примеры, без фильтрации")

Можно еще посмотреть на кластеризацию конструкций, но это не поместиться на экран. Однако я считаю, что это было бы интересно Сереже.

Вот что будет, если взять только первые строки для каждого языка:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  group_by(number, language) |> 
  slice(1)  |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1) |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary")  ->
  sample_first_construction

sample_first_construction |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Выборка с первыми строками для каждого языка",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Code
sample_first_construction |> 
  neighborNet() |> 
  plot()
title(main = "Выборка с первыми строками для каждого языка")

МХ: А что будет, если выбрать случайный эквивалент, а не первую строку?

Сегодня (07-10-2023) я вижу следующее:

  • Бамана всегда дальше всех
  • Группа [Mano [Kono Kpelle]] всегда вместе
  • Группа [Dan Gweetaa Guro] всегда вместе
  • Единственная вариация проиходит с Looma он встречается в следующих комбинациях:
    • [Dan Gweetaa [Guro Looma]]
    • [[Dan Gweetaa Guro] Looma]
    • [Looma [Mano [Kono Kpelle]]]
Code
set.seed(42)
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1) |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary")  ->
  sample_random_construction

sample_random_construction |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Clusterization with random equivalents",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
sample_random_construction |> 
  neighborNet() |> 
  plot()
title(main = "Выборка со случайным эквивалентом для каждого языка")

Вот что будет, если взять только популярные конструкции:

Code
df_wide |> 
  group_by(number) |> 
  filter(n == max(n))  |> 
  ungroup() |> 
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)})) |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary")  ->
  sample_popular_construction

sample_popular_construction |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Выборка с популярынми конструкциями",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
sample_popular_construction  |> 
  neighborNet() |> 
  plot()
title(main = "Выборка с популярынми конструкциями")

МХ: я также думаю что можно попробовать проанализировать только те стимулы где у ВСЕХ языков есть эквиваленты

Всего стимулов 130, но некоторые из них не попали в датасет: 112, 114, 122, 125, 128

Code
df |> 
  count(number, language) |> 
  count(number) |> 
  filter(n == 7) |> 
  pull(number) ->
  to_keep

Вот список стимулов, для которых есть данные всех семи языков (всего таких случаев 83):

Code
to_keep
 [1]   1   2   3   4   6   7   8   9  11  12  14  16  17  18  19  22  23  25  26
[20]  27  28  29  31  32  33  34  35  36  38  39  40  41  43  44  46  49  50  51
[39]  53  54  55  57  58  60  63  66  67  68  69  70  71  72  74  75  76  77  78
[58]  79  80  81  83  85  89  90  91  93  94  96  98  99 100 101 102 103 105 106
[77] 108 109 110 117 118 119 121

Или наоборот, вот номера, для которых не во всех языках есть данные:

Code
df |> 
  count(number, language) |> 
  count(number) |> 
  filter(n < 7) |> 
  pull(number)
 [1]   5  10  13  15  20  21  24  30  37  42  45  47  48  52  56  59  61  62  64
[20]  65  73  82  84  86  87  88  92  95  97 104 107 111 113 115 116 120 123 124
[39] 126 127 129 130

Запустим кластеризацию:

Code
df_wide |> 
  filter(number %in% to_keep) |> 
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)}))  |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary") ->
  stimuli_sample_all_langs

stimuli_sample_all_langs |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Clusterization with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
stimuli_sample_all_langs  |> 
  neighborNet() |> 
  plot()
title(main = "Выборка стимулов со всеми 6 языками")

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type)  |> 
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)  |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = percentage), colour = "white") +
  geom_text(aes(label = str_c(percentage, "%")), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "", title = "Выборка стимулов со всеми 6 языками") +
  theme(legend.position = "bottom")

Интересно, что выборка со случайным эквивалентом и выборка только тех стимулов, где есть данные по всем шести языкам, дает такие разные результаты. Что будет если взять выборку данных по всем языкам и В НЕЙ провести случайную выборку эквивалентной конструкции?

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1) |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") ->
  stimuli_sample_all_langs_and_random

stimuli_sample_all_langs_and_random |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
stimuli_sample_all_langs_and_random  |> 
  neighborNet() |> 
  plot()
title(main = "Выборка стимулов со всеми 6 языками  (случайный эквивалент)")

Перед тем, как приступить к перекодированию, которое предложила Маша К., посмотрим статистику конструкций по языкам:

Code
df |> 
  count(language, construction_type) |>
  group_by(construction_type) |> 
  mutate(overall = sum(n)) |>  
  pivot_wider(names_from = language, values_from = n, values_fill = 0) |> 
  arrange(desc(overall)) |> 
  select(construction_type, overall, Guro, Dan_Gweetaa, Mano, Kpelle, Kono, Looma, Bamana)
construction_type overall Guro Dan_Gweetaa Mano Kpelle Kono Looma Bamana
X Aux Y V 296 43 37 45 36 32 42 61
X Aux V Y Ad 180 23 22 36 23 20 21 35
X Aux Y N V 100 5 11 35 21 12 9 7
X Aux N V Y Ad 68 12 15 10 6 7 12 6
X Aux V loc 37 6 5 8 7 5 6 0
Xinal N Aux V Y Ad 36 8 3 13 5 2 4 1
X Aux Xrefl N V Y Ad 33 4 2 11 7 5 0 4
Y be X Ad 19 2 1 4 4 4 2 2
Yinal N be X Ad 18 0 2 4 5 4 2 1
Xinal N be Y Ad 16 1 1 5 3 2 3 1
Y Aux V X Ad 16 3 2 2 1 2 2 4
X be Y Ad 15 1 1 2 3 4 1 3
Yinal N Aux V X Ad 13 0 2 7 3 1 0 0
X+Y Aux N V 12 0 0 5 5 2 0 0
Yinal N Aux X V 10 0 7 1 1 1 0 0
X+Y Aux V 9 1 0 0 3 1 1 3
X Aux Xrefl V Y Ad 8 1 1 2 1 1 1 1
Y Aux X N V 8 1 2 2 2 0 0 1
X Aux N V 8 0 1 2 2 2 1 0
Y Aux X V 6 0 1 0 0 0 0 5
Xinal Y Aux V 4 1 1 1 0 0 1 0
Xinal Y Aux Xrefl V 4 0 0 1 2 1 0 0
X+Y be N Ad 3 0 0 0 1 1 0 1
X Aux 3sg V Y Ad 3 1 1 1 0 0 0 0
Yinal N be.neg X Ad 3 0 0 1 1 1 0 0
X+Y Aux V X+Yrefl Ad 3 0 0 3 0 0 0 0
X Aux V Y N Ad 2 0 0 0 0 0 0 2
X V Aux Y Ad 2 0 0 0 0 0 0 2
X be X Ad 2 0 0 0 0 0 0 2
X be Y N Ad 2 0 1 0 0 0 0 1
Xinal Y Aux X V 2 0 1 0 0 0 0 1
Y Aux Y V 2 0 0 0 0 0 0 2
X Aux Xrefl N V Y N Ad 2 0 2 0 0 0 0 0
X be Y 2 1 1 0 0 0 0 0
Z Aux X V Y 2 1 1 0 0 0 0 0
Xinal N Aux N V Y Ad 2 2 0 0 0 0 0 0
X be.neg Y Ad 2 0 0 1 1 0 0 0
N adj be X Ad Vinf Y Ad 1 0 0 0 0 0 0 1
X Aux V Aux V Y Ad 1 0 0 0 0 0 0 1
X Aux Xrefl int V Y Ad 1 0 0 0 0 0 0 1
X Aux Y Ad 1 0 0 0 0 0 0 1
X Aux Y V Refl Ad 1 0 0 0 0 0 0 1
X Aux Z V Y Ad 1 0 0 0 0 0 0 1
Xinal N Aux Y Ad 1 0 0 0 0 0 0 1
Xinal N V Y 1 0 0 0 0 0 0 1
Y Aux V Aux V X Ad 1 0 0 0 0 0 0 1
Y Aux Xal N V 1 0 0 0 0 0 0 1
Yinal N Aux X Ad 1 0 0 0 0 0 0 1
N be X Y Ad 1 0 1 0 0 0 0 0
X Aux 3sg N V Y Ad 1 0 1 0 0 0 0 0
X Aux 3sg Z V Y Ad 1 0 1 0 0 0 0 0
X Aux Yal N V 1 0 1 0 0 0 0 0
X Aux.neg V Y Ad 1 0 1 0 0 0 0 0
Xinal Y N Aux V 1 0 1 0 0 0 0 0
Y Aux V X N Ad N Ad 1 0 1 0 0 0 0 0
Yinal N be X N Ad 1 0 1 0 0 0 0 0
Z Aux V X Y loc 1 0 1 0 0 0 0 0
Z Aux X Y V 1 0 1 0 0 0 0 0
3sg Aux V Y Ad 1 1 0 0 0 0 0 0
Y Aux X N V Xrefl Ad 1 1 0 0 0 0 0 0
Yal X Aux V 1 1 0 0 0 0 0 0
X Aux Yinal N V 1 0 0 0 0 1 0 0
X Aux.Neg V Y Ad 1 0 0 0 0 1 0 0
X N Aux.Neg V Y Ad 1 0 0 0 0 1 0 0
X+Y Aux V Z Ad 1 0 0 0 0 1 0 0
Y N be.neg X Ad 1 0 0 0 0 1 0 0
X+Y Aux X+Yrefl N V 3sg Refl Ad 1 0 0 0 1 0 0 0
X+Y Aux X+Yrefl recp N V 1 0 0 0 1 0 0 0
X Aux N V Y N Ad 1 0 0 0 0 0 1 0
X N Aux V Y Ad 1 0 0 0 0 0 1 0
X int N Aux V Y Ad 1 0 0 0 0 0 1 0
X+Y Aux Ad N Num 1 0 0 0 0 0 1 0
X+Y Aux N V Z Ad 1 0 0 0 0 0 1 0
X+Y Aux V int Ad 1 0 0 0 0 0 1 0
Xal Y be.neg V 1 0 0 0 0 0 1 0
3sg N be.neg>3sg with Y with X Ad 1 0 0 1 0 0 0 0
X Aux Y V adj 1 0 0 1 0 0 0 0
X be adj Y Ad 1 0 0 1 0 0 0 0
X+Y Aux V X+Yrefl recp Ad 1 0 0 1 0 0 0 0
X+Y Aux V adv X+Yrefl recp Ad 1 0 0 1 0 0 0 0
X+Y Aux X+Yrefl N V 1 0 0 1 0 0 0 0
X+Y N Aux V 1 0 0 1 0 0 0 0
Xinal N Aux V Xpron Ad Y Ad 1 0 0 1 0 0 0 0
Xinal N be 1 0 0 1 0 0 0 0
Y N be.neg>3sg with X Ad 1 0 0 1 0 0 0 0
Z Aux 3sg V X Ad Y 1 0 0 1 0 0 0 0

Маша предложила классификацию, которую я попробую воспроизвести.

  1. простая переходная вида X Aux Y V (только этот случай)
  2. простая непереходная X Aux V Y Ad, X Aux V loc, X+Y Aux V, Y Aux V X Ad (по идее исчерпывающий список, а что я забыла?)
  3. конструкция с копулой - можно взять автоматически все случаи, где в формуле есть копула be: Y be X Ad, X be Y Ad, X be adj Y Ad, N be Xgen Y Ad, X+Y be N Ad и т.д.
  4. некопульная конструкция со сложным глаголом - взять автоматически все случаи, где в формуле выполняется два условия: есть Aux (=нет be) и есть хотя бы одно N (вместе переходные, и непереходные) - X Aux N V Y Ad; X Aux Y N V; Xgen N Aux V Y Ad и т.д.

По идее это не исчерпывающий список, останется что-то на периферии вроде X+Y Aux V X+Ypron recp Ad, но это редкие штуки.

Code
df |> 
  count(language, construction_type) |>
  mutate(type = case_when(construction_type == "X Aux Y V" ~ "transitive",
                          str_detect(construction_type, "Aux") &
                            !str_detect(construction_type, "N") ~ "intransitive",
                          str_detect(construction_type, "be") ~ "copula",
                          str_detect(construction_type, "Aux") &
                            str_detect(construction_type, "N") ~ "complex verb",
                          TRUE ~ "other")) |> 
  group_by(construction_type) |> 
  mutate(overall = sum(n)) |>  
  pivot_wider(names_from = language, values_from = n, values_fill = 0) |> 
  arrange(desc(overall)) |> 
  select(construction_type, type,   overall, Guro, Dan_Gweetaa, Mano, Kpelle, Kono, Looma, Bamana)
construction_type type overall Guro Dan_Gweetaa Mano Kpelle Kono Looma Bamana
X Aux Y V transitive 296 43 37 45 36 32 42 61
X Aux V Y Ad intransitive 180 23 22 36 23 20 21 35
X Aux Y N V complex verb 100 5 11 35 21 12 9 7
X Aux N V Y Ad complex verb 68 12 15 10 6 7 12 6
X Aux V loc intransitive 37 6 5 8 7 5 6 0
Xinal N Aux V Y Ad complex verb 36 8 3 13 5 2 4 1
X Aux Xrefl N V Y Ad complex verb 33 4 2 11 7 5 0 4
Y be X Ad copula 19 2 1 4 4 4 2 2
Yinal N be X Ad copula 18 0 2 4 5 4 2 1
Xinal N be Y Ad copula 16 1 1 5 3 2 3 1
Y Aux V X Ad intransitive 16 3 2 2 1 2 2 4
X be Y Ad copula 15 1 1 2 3 4 1 3
Yinal N Aux V X Ad complex verb 13 0 2 7 3 1 0 0
X+Y Aux N V complex verb 12 0 0 5 5 2 0 0
Yinal N Aux X V complex verb 10 0 7 1 1 1 0 0
X+Y Aux V intransitive 9 1 0 0 3 1 1 3
X Aux Xrefl V Y Ad intransitive 8 1 1 2 1 1 1 1
Y Aux X N V complex verb 8 1 2 2 2 0 0 1
X Aux N V complex verb 8 0 1 2 2 2 1 0
Y Aux X V intransitive 6 0 1 0 0 0 0 5
Xinal Y Aux V intransitive 4 1 1 1 0 0 1 0
Xinal Y Aux Xrefl V intransitive 4 0 0 1 2 1 0 0
X+Y be N Ad copula 3 0 0 0 1 1 0 1
X Aux 3sg V Y Ad intransitive 3 1 1 1 0 0 0 0
Yinal N be.neg X Ad copula 3 0 0 1 1 1 0 0
X+Y Aux V X+Yrefl Ad intransitive 3 0 0 3 0 0 0 0
X Aux V Y N Ad complex verb 2 0 0 0 0 0 0 2
X V Aux Y Ad intransitive 2 0 0 0 0 0 0 2
X be X Ad copula 2 0 0 0 0 0 0 2
X be Y N Ad copula 2 0 1 0 0 0 0 1
Xinal Y Aux X V intransitive 2 0 1 0 0 0 0 1
Y Aux Y V intransitive 2 0 0 0 0 0 0 2
X Aux Xrefl N V Y N Ad complex verb 2 0 2 0 0 0 0 0
X be Y copula 2 1 1 0 0 0 0 0
Z Aux X V Y intransitive 2 1 1 0 0 0 0 0
Xinal N Aux N V Y Ad complex verb 2 2 0 0 0 0 0 0
X be.neg Y Ad copula 2 0 0 1 1 0 0 0
N adj be X Ad Vinf Y Ad copula 1 0 0 0 0 0 0 1
X Aux V Aux V Y Ad intransitive 1 0 0 0 0 0 0 1
X Aux Xrefl int V Y Ad intransitive 1 0 0 0 0 0 0 1
X Aux Y Ad intransitive 1 0 0 0 0 0 0 1
X Aux Y V Refl Ad intransitive 1 0 0 0 0 0 0 1
X Aux Z V Y Ad intransitive 1 0 0 0 0 0 0 1
Xinal N Aux Y Ad complex verb 1 0 0 0 0 0 0 1
Xinal N V Y other 1 0 0 0 0 0 0 1
Y Aux V Aux V X Ad intransitive 1 0 0 0 0 0 0 1
Y Aux Xal N V complex verb 1 0 0 0 0 0 0 1
Yinal N Aux X Ad complex verb 1 0 0 0 0 0 0 1
N be X Y Ad copula 1 0 1 0 0 0 0 0
X Aux 3sg N V Y Ad complex verb 1 0 1 0 0 0 0 0
X Aux 3sg Z V Y Ad intransitive 1 0 1 0 0 0 0 0
X Aux Yal N V complex verb 1 0 1 0 0 0 0 0
X Aux.neg V Y Ad intransitive 1 0 1 0 0 0 0 0
Xinal Y N Aux V complex verb 1 0 1 0 0 0 0 0
Y Aux V X N Ad N Ad complex verb 1 0 1 0 0 0 0 0
Yinal N be X N Ad copula 1 0 1 0 0 0 0 0
Z Aux V X Y loc intransitive 1 0 1 0 0 0 0 0
Z Aux X Y V intransitive 1 0 1 0 0 0 0 0
3sg Aux V Y Ad intransitive 1 1 0 0 0 0 0 0
Y Aux X N V Xrefl Ad complex verb 1 1 0 0 0 0 0 0
Yal X Aux V intransitive 1 1 0 0 0 0 0 0
X Aux Yinal N V complex verb 1 0 0 0 0 1 0 0
X Aux.Neg V Y Ad complex verb 1 0 0 0 0 1 0 0
X N Aux.Neg V Y Ad complex verb 1 0 0 0 0 1 0 0
X+Y Aux V Z Ad intransitive 1 0 0 0 0 1 0 0
Y N be.neg X Ad copula 1 0 0 0 0 1 0 0
X+Y Aux X+Yrefl N V 3sg Refl Ad complex verb 1 0 0 0 1 0 0 0
X+Y Aux X+Yrefl recp N V complex verb 1 0 0 0 1 0 0 0
X Aux N V Y N Ad complex verb 1 0 0 0 0 0 1 0
X N Aux V Y Ad complex verb 1 0 0 0 0 0 1 0
X int N Aux V Y Ad complex verb 1 0 0 0 0 0 1 0
X+Y Aux Ad N Num complex verb 1 0 0 0 0 0 1 0
X+Y Aux N V Z Ad complex verb 1 0 0 0 0 0 1 0
X+Y Aux V int Ad intransitive 1 0 0 0 0 0 1 0
Xal Y be.neg V copula 1 0 0 0 0 0 1 0
3sg N be.neg>3sg with Y with X Ad copula 1 0 0 1 0 0 0 0
X Aux Y V adj intransitive 1 0 0 1 0 0 0 0
X be adj Y Ad copula 1 0 0 1 0 0 0 0
X+Y Aux V X+Yrefl recp Ad intransitive 1 0 0 1 0 0 0 0
X+Y Aux V adv X+Yrefl recp Ad intransitive 1 0 0 1 0 0 0 0
X+Y Aux X+Yrefl N V complex verb 1 0 0 1 0 0 0 0
X+Y N Aux V complex verb 1 0 0 1 0 0 0 0
Xinal N Aux V Xpron Ad Y Ad complex verb 1 0 0 1 0 0 0 0
Xinal N be copula 1 0 0 1 0 0 0 0
Y N be.neg>3sg with X Ad copula 1 0 0 1 0 0 0 0
Z Aux 3sg V X Ad Y intransitive 1 0 0 1 0 0 0 0

Вот, что получилось. Маш, что исправить? Вот саммари:

Code
df |> 
  mutate(type = case_when(construction_type == "X Aux Y V" ~ "transitive",
                          str_detect(construction_type, "Aux") &
                            !str_detect(construction_type, "N") ~ "intransitive",
                          str_detect(construction_type, "be") ~ "copula",
                          str_detect(construction_type, "Aux") &
                            str_detect(construction_type, "N") ~ "complex verb",
                          TRUE ~ "other")) |> 
  count(type, language) |> 
  pivot_wider(names_from = language, values_from = n, values_fill = 0)  |> 
  arrange(-Looma) |> 
  select(type, Guro, Dan_Gweetaa, Mano, Kpelle, Kono, Looma, Bamana)
type Guro Dan_Gweetaa Mano Kpelle Kono Looma Bamana
transitive 43 37 45 36 32 42 61
intransitive 39 39 58 37 31 33 59
complex verb 33 49 89 54 35 31 24
copula 5 9 21 18 17 9 12
other 0 0 0 0 0 0 1

Во-вторых, есть отдельная просьба про распределение конструкций и глаголов. В первом листе stimuli_general в столбце B есть семантическая разметка глаголов. Добавь её, пожалуйста, в глагольную таблицу, которую ты будешь снова генерировать. А дальше я бы попросила сделать следующее. Посчитать для каждого глагола, сколько ему в наших языках соответствует разных конструкций из 4 типов, выделенных выше. А потом слить глаголы из одной семантической группы и посчитать для семантических групп вместе статистику по конструкциям. Грубо говоря я хочу проверить, верно ли, что у глаголов вида effect будет больше переходных конструкций, а у feeling больше сложных глаголов, т.к. более абстрактные значения языки склонны выражать метафорически.

Code
df |> 
  left_join(stimuli) |> 
    mutate(type = case_when(construction_type == "X Aux Y V" ~ "transitive",
                          str_detect(construction_type, "Aux") &
                            !str_detect(construction_type, "N") ~ "intransitive",
                          str_detect(construction_type, "be") ~ "copula",
                          str_detect(construction_type, "Aux") &
                            str_detect(construction_type, "N") ~ "complex verb",
                          TRUE ~ "other")) |> 
  count(type, language, semantic_type) |> 
  mutate(language = str_replace(language, "_", " "),
         language = factor(language, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana"))) |> 
  ggplot(aes(n, type))+
  geom_col()+
  facet_grid(semantic_type~language, scales = "free")

Ну вот примерно, то, что ты имела в виду, наверное, но это все нужно пересчитать, после того, как мы утвердим тип каждой из конструкции.

Какие адлоги есть для каждого языка?

Code
df |>
  mutate(adposition = str_split(adposition, ", ")) |> 
  unnest_longer(adposition) |> 
  count(language, adposition) |> 
  na.omit()
language adposition n
Bamana bólo 6
Bamana fɛ̀ 9
Bamana kàn 5
Bamana kɔ́ 4
Bamana kɔ́nɔ 2
Bamana kɔ́rɔ 1
Bamana lá 33
Bamana mà 6
Bamana nɔ̀fɛ̀ 2
Bamana yé 6
Bamana ɲɛ́ 1
Dan_Gweetaa bhàa 2
Dan_Gweetaa bhȁ 23
Dan_Gweetaa dhi̋ɤ 3
Dan_Gweetaa dhɛ̏ 2
Dan_Gweetaa gɔ̏ 5
Dan_Gweetaa gɯ́ 6
Dan_Gweetaa ká 14
Dan_Gweetaa kèŋ̏ 1
Dan_Gweetaa loc 6
Dan_Gweetaa pi̋ɤ 2
Dan_Gweetaa tȁ 4
Dan_Gweetaa zɯ̏ 1
Guro jì 2
Guro la 2
Guro leè 3
Guro loc 6
Guro 8
Guro ta 4
Guro và 8
Guro ya̰ 9
Guro zì 2
Guro zuo 2
Guro ɓa̰ 20
Kono 11
Kono hù 5
Kono lale 1
Kono lá 1
Kono loc 5
Kono mà 28
Kono mɛ̌i 1
Kono pòò 1
Kono pɔ̀ 4
Kono pɔ́ɲà 1
Kono tɛ́ɓù 1
Kono yêi 4
Kpelle 11
Kpelle hù 3
Kpelle lá 1
Kpelle loc 7
Kpelle púlû 4
Kpelle pɔ́ 4
Kpelle yêi 4
Kpelle ɓà 37
Kpelle ɲá 1
Looma bà 7
Looma bù 1
Looma bɛ̀ 5
Looma gà 10
Looma kómá 1
Looma loc 6
Looma mà 20
Looma pòlù 2
Looma sù 3
Looma tá 2
Looma yà 3
Mano bà 6
Mano gé 1
Mano gɛ̀nɛ̀ 1
Mano ká 24
Mano kɛ̀lɛ̀ 7
Mano là 7
Mano loc 8
Mano lɛ̀ɛ̄ 6
Mano mɔ̀ 45
Mano píé 8
Mano yí 1
Mano ŋwɛ́ŋ̀ 4

Какие были раньше проблемы:

  • в коно нет адлогов
  • в дан гвета есть адлоги dhi̋ɤ и dhiɤ̋, bhȁ и bhàa, которые, наверное, про одно и то же. Кроме того есть адлог loc.
  • в гуро есть адлоги léè и leè, lɛ̄ и lɛ, ɓa̰ и ɓā̰. Кроме того есть адлог loc.
  • в лома есть адлоги mà и mà̀ (с двумя знаками тона). Кроме того есть адлог loc.
  • в мано есть адлоги píé и píé (я не понимаю в чем разница…).

Проблемы на 15.02.2023:

  • В гуро ɓa̰ и ɓā̰ — разное?
  • В гуро ya̰ и yā̰ — разное?

Я не совсем понял, что написала МХ, так что я пока посчитал вот такое вот. Какие адлоги из разных языков встречаются друг с другом в одной расширеной конструкции внутри одного стимула?

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(adposition)) |> 
  group_by(number) |> 
  mutate(adposition = str_c(language, "__", adposition)) |> 
  pairwise_count(adposition, construction_type)  |>
  group_by(item1, item2) |> 
  reframe(n = sum(n)) |> 
  arrange(desc(n))
item1 item2 n
Kpelle__ɓà Mano__mɔ̀ 24
Mano__mɔ̀ Kpelle__ɓà 24
Kono__mà Kpelle__ɓà 17
Kpelle__ɓà Kono__mà 17
Kono__mà Mano__mɔ̀ 15
Mano__mɔ̀ Kono__mà 15
Kono__à Kpelle__à 11
Kpelle__à Kono__à 11
Bamana__lá Kpelle__ɓà 7
Dan_Gweetaa__bhȁ Kpelle__ɓà 7
Dan_Gweetaa__bhȁ Mano__mɔ̀ 7
Guro__ɓa̰ Kpelle__ɓà 7
Kono__à Mano__ká 7
Kpelle__à Mano__ká 7
Kpelle__ɓà Bamana__lá 7
Kpelle__ɓà Dan_Gweetaa__bhȁ 7
Kpelle__ɓà Guro__ɓa̰ 7
Mano__ká Kono__à 7
Mano__ká Kpelle__à 7
Mano__mɔ̀ Dan_Gweetaa__bhȁ 7
Bamana__lá Mano__mɔ̀ 5
Dan_Gweetaa__loc Guro__loc 5
Dan_Gweetaa__loc Kono__loc 5
Dan_Gweetaa__loc Kpelle__loc 5
Dan_Gweetaa__loc Looma__loc 5
Dan_Gweetaa__loc Mano__loc 5
Guro__loc Dan_Gweetaa__loc 5
Guro__loc Kono__loc 5
Guro__loc Kpelle__loc 5
Guro__loc Looma__loc 5
Guro__loc Mano__loc 5
Guro__ɓa̰ Kono__mà 5
Guro__ɓa̰ Mano__mɔ̀ 5
Kono__loc Dan_Gweetaa__loc 5
Kono__loc Guro__loc 5
Kono__loc Kpelle__loc 5
Kono__loc Looma__loc 5
Kono__loc Mano__loc 5
Kono__mà Guro__ɓa̰ 5
Kono__mà Looma__mà 5
Kpelle__loc Dan_Gweetaa__loc 5
Kpelle__loc Guro__loc 5
Kpelle__loc Kono__loc 5
Kpelle__loc Looma__loc 5
Kpelle__loc Mano__loc 5
Kpelle__ɓà Looma__mà 5
Looma__loc Dan_Gweetaa__loc 5
Looma__loc Guro__loc 5
Looma__loc Kono__loc 5
Looma__loc Kpelle__loc 5
Looma__loc Mano__loc 5
Looma__mà Kono__mà 5
Looma__mà Kpelle__ɓà 5
Looma__mà Mano__mɔ̀ 5
Mano__loc Dan_Gweetaa__loc 5
Mano__loc Guro__loc 5
Mano__loc Kono__loc 5
Mano__loc Kpelle__loc 5
Mano__loc Looma__loc 5
Mano__mɔ̀ Bamana__lá 5
Mano__mɔ̀ Guro__ɓa̰ 5
Mano__mɔ̀ Looma__mà 5
Bamana__lá Guro__ɓa̰ 4
Dan_Gweetaa__bhȁ Kono__mà 4
Dan_Gweetaa__ká Guro__ɓa̰ 4
Guro__ɓa̰ Bamana__lá 4
Guro__ɓa̰ Dan_Gweetaa__ká 4
Kono__mà Dan_Gweetaa__bhȁ 4
Kono__yêi Kpelle__yêi 4
Kpelle__yêi Kono__yêi 4
Kpelle__ɓà Looma__bà 4
Looma__bà Kpelle__ɓà 4
Bamana__lá Dan_Gweetaa__bhȁ 3
Bamana__lá Dan_Gweetaa__ká 3
Bamana__lá Kono__mà 3
Bamana__lá Looma__mà 3
Dan_Gweetaa__bhȁ Bamana__lá 3
Dan_Gweetaa__ká Bamana__lá 3
Dan_Gweetaa__ká Guro__ya̰ 3
Guro__lɛ Kpelle__ɓà 3
Guro__và Kpelle__ɓà 3
Guro__và Looma__mà 3
Guro__và Mano__mɔ̀ 3
Guro__ya̰ Dan_Gweetaa__ká 3
Guro__ya̰ Kono__à 3
Guro__ya̰ Kpelle__à 3
Kono__à Guro__ya̰ 3
Kono__mà Bamana__lá 3
Kono__mà Looma__bà 3
Kono__yêi Mano__kɛ̀lɛ̀ 3
Kpelle__à Guro__ya̰ 3
Kpelle__yêi Mano__kɛ̀lɛ̀ 3
Kpelle__ɓà Guro__lɛ 3
Kpelle__ɓà Guro__và 3
Kpelle__ɓà Mano__ká 3
Looma__bà Kono__mà 3
Looma__bà Mano__mɔ̀ 3
Looma__mà Bamana__lá 3
Looma__mà Guro__và 3
Mano__ká Kpelle__ɓà 3
Mano__kɛ̀lɛ̀ Kono__yêi 3
Mano__kɛ̀lɛ̀ Kpelle__yêi 3
Mano__mɔ̀ Guro__và 3
Mano__mɔ̀ Looma__bà 3
Bamana__bólo Kono__yêi 2
Bamana__bólo Kpelle__yêi 2
Bamana__fɛ̀ Mano__píé 2
Bamana__kɔ́ Guro__zuo 2
Bamana__kɔ́ Kpelle__púlû 2
Bamana__lá Guro__và 2
Bamana__lá Kono__hù 2
Bamana__lá Kpelle__hù 2
Bamana__lá Looma__bà 2
Bamana__lá Mano__ká 2
Bamana__mà Kono__mà 2
Bamana__mà Kpelle__ɓà 2
Bamana__mà Mano__mɔ̀ 2
Bamana__yé Guro__lɛ 2
Bamana__yé Looma__bɛ̀ 2
Dan_Gweetaa__bhȁ Guro__ta 2
Dan_Gweetaa__bhȁ Guro__và 2
Dan_Gweetaa__gɔ̏ Guro__leè 2
Dan_Gweetaa__gɯ́ Kono__mà 2
Dan_Gweetaa__gɯ́ Kpelle__ɓà 2
Dan_Gweetaa__gɯ́ Looma__mà 2
Dan_Gweetaa__gɯ́ Mano__mɔ̀ 2
Dan_Gweetaa__ká Guro__và 2
Dan_Gweetaa__ká Kono__à 2
Dan_Gweetaa__ká Kono__mà 2
Dan_Gweetaa__ká Kpelle__à 2
Dan_Gweetaa__ká Mano__ká 2
Dan_Gweetaa__ká Mano__mɔ̀ 2
Dan_Gweetaa__tȁ Guro__ta 2
Guro__leè Dan_Gweetaa__gɔ̏ 2
Guro__lɛ Bamana__yé 2
Guro__lɛ Mano__lɛ̀ɛ̄ 2
Guro__lɛ Mano__mɔ̀ 2
Guro__ta Dan_Gweetaa__bhȁ 2
Guro__ta Dan_Gweetaa__tȁ 2
Guro__ta Kono__mà 2
Guro__ta Kpelle__ɓà 2
Guro__ta Mano__mɔ̀ 2
Guro__và Bamana__lá 2
Guro__và Dan_Gweetaa__bhȁ 2
Guro__và Dan_Gweetaa__ká 2
Guro__và Kpelle__pɔ́ 2
Guro__và Mano__píé 2
Guro__ya̰ Mano__ká 2
Guro__zuo Bamana__kɔ́ 2
Guro__zuo Kpelle__púlû 2
Guro__ɓa̰ Looma__gà 2
Guro__ɓa̰ Looma__mà 2
Kono__à Dan_Gweetaa__ká 2
Kono__à Looma__gà 2
Kono__hù Bamana__lá 2
Kono__hù Kpelle__hù 2
Kono__mà Bamana__mà 2
Kono__mà Dan_Gweetaa__gɯ́ 2
Kono__mà Dan_Gweetaa__ká 2
Kono__mà Guro__ta 2
Kono__mà Mano__ká 2
Kono__yêi Bamana__bólo 2
Kono__yêi Looma__yà 2
Kpelle__à Dan_Gweetaa__ká 2
Kpelle__à Looma__gà 2
Kpelle__hù Bamana__lá 2
Kpelle__hù Kono__hù 2
Kpelle__púlû Bamana__kɔ́ 2
Kpelle__púlû Guro__zuo 2
Kpelle__púlû Mano__píé 2
Kpelle__pɔ́ Guro__và 2
Kpelle__pɔ́ Mano__píé 2
Kpelle__yêi Bamana__bólo 2
Kpelle__yêi Looma__yà 2
Kpelle__ɓà Bamana__mà 2
Kpelle__ɓà Dan_Gweetaa__gɯ́ 2
Kpelle__ɓà Guro__ta 2
Kpelle__ɓà Looma__gà 2
Looma__bà Bamana__lá 2
Looma__bɛ̀ Bamana__yé 2
Looma__gà Guro__ɓa̰ 2
Looma__gà Kono__à 2
Looma__gà Kpelle__à 2
Looma__gà Kpelle__ɓà 2
Looma__gà Mano__ká 2
Looma__mà Dan_Gweetaa__gɯ́ 2
Looma__mà Guro__ɓa̰ 2
Looma__mà Looma__sù 2
Looma__sù Looma__mà 2
Looma__yà Kono__yêi 2
Looma__yà Kpelle__yêi 2
Looma__yà Mano__kɛ̀lɛ̀ 2
Mano__ká Bamana__lá 2
Mano__ká Dan_Gweetaa__ká 2
Mano__ká Guro__ya̰ 2
Mano__ká Kono__mà 2
Mano__ká Looma__gà 2
Mano__kɛ̀lɛ̀ Looma__yà 2
Mano__lɛ̀ɛ̄ Guro__lɛ 2
Mano__lɛ̀ɛ̄ Mano__píé 2
Mano__mɔ̀ Bamana__mà 2
Mano__mɔ̀ Dan_Gweetaa__gɯ́ 2
Mano__mɔ̀ Dan_Gweetaa__ká 2
Mano__mɔ̀ Guro__lɛ 2
Mano__mɔ̀ Guro__ta 2
Mano__píé Bamana__fɛ̀ 2
Mano__píé Guro__và 2
Mano__píé Kpelle__púlû 2
Mano__píé Kpelle__pɔ́ 2
Mano__píé Mano__lɛ̀ɛ̄ 2
Mano__píé Mano__ŋwɛ́ŋ̀ 2
Mano__ŋwɛ́ŋ̀ Mano__píé 2
Bamana__bólo Bamana__fɛ̀ 1
Bamana__bólo Looma__yà 1
Bamana__bólo Mano__kɛ̀lɛ̀ 1
Bamana__fɛ̀ Bamana__bólo 1
Bamana__fɛ̀ Dan_Gweetaa__bhȁ 1
Bamana__fɛ̀ Dan_Gweetaa__pi̋ɤ 1
Bamana__fɛ̀ Guro__và 1
Bamana__fɛ̀ Kono__pɔ̀ 1
Bamana__fɛ̀ Kpelle__pɔ́ 1
Bamana__fɛ̀ Kpelle__ɓà 1
Bamana__fɛ̀ Looma__bà 1
Bamana__fɛ̀ Looma__tá 1
Bamana__fɛ̀ Mano__lɛ̀ɛ̄ 1
Bamana__fɛ̀ Mano__mɔ̀ 1
Bamana__kàn Bamana__kɔ́ 1
Bamana__kàn Bamana__lá 1
Bamana__kàn Bamana__mà 1
Bamana__kàn Bamana__nɔ̀fɛ̀ 1
Bamana__kàn Dan_Gweetaa__kèŋ̏ 1
Bamana__kàn Dan_Gweetaa__tȁ 1
Bamana__kàn Guro__ta 1
Bamana__kàn Guro__zuo 1
Bamana__kàn Kono__mà 1
Bamana__kàn Kpelle__púlû 1
Bamana__kàn Kpelle__ɓà 1
Bamana__kàn Looma__pòlù 1
Bamana__kàn Mano__mɔ̀ 1
Bamana__kɔ́ Bamana__kàn 1
Bamana__kɔ́ Bamana__lá 1
Bamana__kɔ́ Bamana__nɔ̀fɛ̀ 1
Bamana__kɔ́ Dan_Gweetaa__kèŋ̏ 1
Bamana__kɔ́ Kono__mà 1
Bamana__kɔ́ Kono__pòò 1
Bamana__kɔ́ Kpelle__ɓà 1
Bamana__kɔ́ Looma__mà 1
Bamana__kɔ́ Looma__pòlù 1
Bamana__kɔ́ Mano__mɔ̀ 1
Bamana__kɔ́ Mano__píé 1
Bamana__lá Bamana__kàn 1
Bamana__lá Bamana__kɔ́ 1
Bamana__lá Bamana__nɔ̀fɛ̀ 1
Bamana__lá Dan_Gweetaa__bhàa 1
Bamana__lá Dan_Gweetaa__kèŋ̏ 1
Bamana__lá Guro__lɛ 1
Bamana__lá Guro__ta 1
Bamana__lá Guro__ya̰ 1
Bamana__lá Guro__zuo 1
Bamana__lá Kono__à 1
Bamana__lá Kono__mɛ̌i 1
Bamana__lá Kpelle__à 1
Bamana__lá Kpelle__púlû 1
Bamana__lá Looma__bù 1
Bamana__lá Looma__gà 1
Bamana__lá Looma__pòlù 1
Bamana__lá Mano__bà 1
Bamana__lá Mano__là 1
Bamana__lá Mano__yí 1
Bamana__mà Bamana__kàn 1
Bamana__mà Dan_Gweetaa__tȁ 1
Bamana__mà Guro__ta 1
Bamana__nɔ̀fɛ̀ Bamana__kàn 1
Bamana__nɔ̀fɛ̀ Bamana__kɔ́ 1
Bamana__nɔ̀fɛ̀ Bamana__lá 1
Bamana__nɔ̀fɛ̀ Dan_Gweetaa__bhȁ 1
Bamana__nɔ̀fɛ̀ Dan_Gweetaa__kèŋ̏ 1
Bamana__nɔ̀fɛ̀ Guro__zuo 1
Bamana__nɔ̀fɛ̀ Kono__mà 1
Bamana__nɔ̀fɛ̀ Kpelle__púlû 1
Bamana__nɔ̀fɛ̀ Kpelle__ɓà 1
Bamana__nɔ̀fɛ̀ Looma__bɛ̀ 1
Bamana__nɔ̀fɛ̀ Looma__pòlù 1
Bamana__nɔ̀fɛ̀ Mano__mɔ̀ 1
Bamana__yé Dan_Gweetaa__dhi̋ɤ 1
Bamana__yé Looma__gà 1
Bamana__yé Mano__lɛ̀ɛ̄ 1
Bamana__ɲɛ́ Dan_Gweetaa__gɔ̏ 1
Bamana__ɲɛ́ Guro__leè 1
Bamana__ɲɛ́ Kono__mà 1
Bamana__ɲɛ́ Kpelle__ɓà 1
Bamana__ɲɛ́ Looma__bà 1
Bamana__ɲɛ́ Mano__ká 1
Bamana__ɲɛ́ Mano__lɛ̀ɛ̄ 1
Dan_Gweetaa__bhàa Bamana__lá 1
Dan_Gweetaa__bhàa Kono__hù 1
Dan_Gweetaa__bhàa Kpelle__hù 1
Dan_Gweetaa__bhàa Looma__bù 1
Dan_Gweetaa__bhàa Mano__bà 1
Dan_Gweetaa__bhȁ Bamana__fɛ̀ 1
Dan_Gweetaa__bhȁ Bamana__nɔ̀fɛ̀ 1
Dan_Gweetaa__bhȁ Dan_Gweetaa__tȁ 1
Dan_Gweetaa__bhȁ Guro__ya̰ 1
Dan_Gweetaa__bhȁ Guro__zì 1
Dan_Gweetaa__bhȁ Guro__ɓa̰ 1
Dan_Gweetaa__bhȁ Kono__à 1
Dan_Gweetaa__bhȁ Kono__hù 1
Dan_Gweetaa__bhȁ Kpelle__à 1
Dan_Gweetaa__bhȁ Looma__bà 1
Dan_Gweetaa__bhȁ Looma__bɛ̀ 1
Dan_Gweetaa__bhȁ Looma__gà 1
Dan_Gweetaa__bhȁ Looma__mà 1
Dan_Gweetaa__bhȁ Looma__sù 1
Dan_Gweetaa__bhȁ Mano__bà 1
Dan_Gweetaa__bhȁ Mano__là 1
Dan_Gweetaa__dhi̋ɤ Bamana__yé 1
Dan_Gweetaa__dhɛ̏ Mano__lɛ̀ɛ̄ 1
Dan_Gweetaa__dhɛ̏ Mano__píé 1
Dan_Gweetaa__gɔ̏ Bamana__ɲɛ́ 1
Dan_Gweetaa__gɔ̏ Kono__mà 1
Dan_Gweetaa__gɔ̏ Kono__yêi 1
Dan_Gweetaa__gɔ̏ Kpelle__yêi 1
Dan_Gweetaa__gɔ̏ Kpelle__ɓà 1
Dan_Gweetaa__gɔ̏ Looma__bà 1
Dan_Gweetaa__gɔ̏ Looma__yà 1
Dan_Gweetaa__gɔ̏ Mano__gɛ̀nɛ̀ 1
Dan_Gweetaa__gɔ̏ Mano__ká 1
Dan_Gweetaa__gɔ̏ Mano__kɛ̀lɛ̀ 1
Dan_Gweetaa__gɔ̏ Mano__lɛ̀ɛ̄ 1
Dan_Gweetaa__gɯ́ Guro__và 1
Dan_Gweetaa__gɯ́ Guro__ɓa̰ 1
Dan_Gweetaa__gɯ́ Looma__sù 1
Dan_Gweetaa__gɯ́ Mano__píé 1
Dan_Gweetaa__gɯ́ Mano__ŋwɛ́ŋ̀ 1
Dan_Gweetaa__ká Dan_Gweetaa__tȁ 1
Dan_Gweetaa__ká Kpelle__ɓà 1
Dan_Gweetaa__ká Looma__bà 1
Dan_Gweetaa__ká Looma__gà 1
Dan_Gweetaa__ká Looma__tá 1
Dan_Gweetaa__kèŋ̏ Bamana__kàn 1
Dan_Gweetaa__kèŋ̏ Bamana__kɔ́ 1
Dan_Gweetaa__kèŋ̏ Bamana__lá 1
Dan_Gweetaa__kèŋ̏ Bamana__nɔ̀fɛ̀ 1
Dan_Gweetaa__kèŋ̏ Guro__zuo 1
Dan_Gweetaa__kèŋ̏ Kpelle__púlû 1
Dan_Gweetaa__kèŋ̏ Looma__pòlù 1
Dan_Gweetaa__pi̋ɤ Bamana__fɛ̀ 1
Dan_Gweetaa__pi̋ɤ Guro__và 1
Dan_Gweetaa__pi̋ɤ Kono__pɔ̀ 1
Dan_Gweetaa__pi̋ɤ Kpelle__pɔ́ 1
Dan_Gweetaa__pi̋ɤ Mano__píé 1
Dan_Gweetaa__tȁ Bamana__kàn 1
Dan_Gweetaa__tȁ Bamana__mà 1
Dan_Gweetaa__tȁ Dan_Gweetaa__bhȁ 1
Dan_Gweetaa__tȁ Dan_Gweetaa__ká 1
Dan_Gweetaa__tȁ Guro__jì 1
Dan_Gweetaa__tȁ Kono__hù 1
Dan_Gweetaa__tȁ Kono__mà 1
Dan_Gweetaa__tȁ Kpelle__ɓà 1
Dan_Gweetaa__tȁ Looma__mà 1
Dan_Gweetaa__tȁ Mano__là 1
Dan_Gweetaa__tȁ Mano__mɔ̀ 1
Dan_Gweetaa__zɯ̏ Guro__zì 1
Dan_Gweetaa__zɯ̏ Kono__mà 1
Dan_Gweetaa__zɯ̏ Looma__mà 1
Dan_Gweetaa__zɯ̏ Mano__mɔ̀ 1
Guro__jì Dan_Gweetaa__tȁ 1
Guro__jì Looma__mà 1
Guro__leè Bamana__ɲɛ́ 1
Guro__leè Kono__mà 1
Guro__leè Kono__yêi 1
Guro__leè Kpelle__yêi 1
Guro__leè Kpelle__ɓà 1
Guro__leè Looma__bà 1
Guro__leè Looma__yà 1
Guro__leè Mano__ká 1
Guro__leè Mano__kɛ̀lɛ̀ 1
Guro__leè Mano__lɛ̀ɛ̄ 1
Guro__lɛ Bamana__lá 1
Guro__lɛ Kono__mà 1
Guro__lɛ Kono__mɛ̌i 1
Guro__lɛ Looma__bɛ̀ 1
Guro__lɛ Mano__là 1
Guro__ta Bamana__kàn 1
Guro__ta Bamana__lá 1
Guro__ta Bamana__mà 1
Guro__ta Kono__hù 1
Guro__ta Looma__bà 1
Guro__ta Mano__là 1
Guro__và Bamana__fɛ̀ 1
Guro__và Dan_Gweetaa__gɯ́ 1
Guro__và Dan_Gweetaa__pi̋ɤ 1
Guro__và Guro__ya̰ 1
Guro__và Guro__ɓa̰ 1
Guro__và Kono__mà 1
Guro__và Kono__pɔ̀ 1
Guro__và Looma__bà 1
Guro__và Mano__ŋwɛ́ŋ̀ 1
Guro__ya̰ Bamana__lá 1
Guro__ya̰ Dan_Gweetaa__bhȁ 1
Guro__ya̰ Guro__và 1
Guro__ya̰ Looma__gà 1
Guro__ya̰ Looma__tá 1
Guro__ya̰ Mano__mɔ̀ 1
Guro__zì Dan_Gweetaa__bhȁ 1
Guro__zì Dan_Gweetaa__zɯ̏ 1
Guro__zì Kono__mà 1
Guro__zì Looma__mà 1
Guro__zì Mano__mɔ̀ 1
Guro__zuo Bamana__kàn 1
Guro__zuo Bamana__lá 1
Guro__zuo Bamana__nɔ̀fɛ̀ 1
Guro__zuo Dan_Gweetaa__kèŋ̏ 1
Guro__zuo Kono__pòò 1
Guro__zuo Looma__pòlù 1
Guro__zuo Mano__píé 1
Guro__ɓa̰ Dan_Gweetaa__bhȁ 1
Guro__ɓa̰ Dan_Gweetaa__gɯ́ 1
Guro__ɓa̰ Guro__và 1
Guro__ɓa̰ Looma__bà 1
Guro__ɓa̰ Looma__kómá 1
Guro__ɓa̰ Mano__bà 1
Guro__ɓa̰ Mano__ká 1
Guro__ɓa̰ Mano__píé 1
Guro__ɓa̰ Mano__ŋwɛ́ŋ̀ 1
Kono__à Bamana__lá 1
Kono__à Dan_Gweetaa__bhȁ 1
Kono__à Looma__tá 1
Kono__à Mano__mɔ̀ 1
Kono__hù Dan_Gweetaa__bhàa 1
Kono__hù Dan_Gweetaa__bhȁ 1
Kono__hù Dan_Gweetaa__tȁ 1
Kono__hù Guro__ta 1
Kono__hù Looma__bù 1
Kono__hù Looma__mà 1
Kono__hù Mano__bà 1
Kono__hù Mano__ká 1
Kono__hù Mano__là 1
Kono__hù Mano__yí 1
Kono__lá Kpelle__lá 1
Kono__lá Looma__yà 1
Kono__lá Mano__là 1
Kono__mà Bamana__kàn 1
Kono__mà Bamana__kɔ́ 1
Kono__mà Bamana__nɔ̀fɛ̀ 1
Kono__mà Bamana__ɲɛ́ 1
Kono__mà Dan_Gweetaa__gɔ̏ 1
Kono__mà Dan_Gweetaa__tȁ 1
Kono__mà Dan_Gweetaa__zɯ̏ 1
Kono__mà Guro__leè 1
Kono__mà Guro__lɛ 1
Kono__mà Guro__và 1
Kono__mà Guro__zì 1
Kono__mà Kpelle__hù 1
Kono__mà Looma__bɛ̀ 1
Kono__mà Looma__gà 1
Kono__mà Mano__bà 1
Kono__mà Mano__lɛ̀ɛ̄ 1
Kono__mà Mano__píé 1
Kono__mà Mano__ŋwɛ́ŋ̀ 1
Kono__mɛ̌i Bamana__lá 1
Kono__mɛ̌i Guro__lɛ 1
Kono__mɛ̌i Kpelle__ɓà 1
Kono__mɛ̌i Mano__là 1
Kono__pòò Bamana__kɔ́ 1
Kono__pòò Guro__zuo 1
Kono__pòò Kpelle__púlû 1
Kono__pòò Mano__píé 1
Kono__pɔ̀ Bamana__fɛ̀ 1
Kono__pɔ̀ Dan_Gweetaa__pi̋ɤ 1
Kono__pɔ̀ Guro__và 1
Kono__pɔ̀ Kpelle__pɔ́ 1
Kono__pɔ̀ Kpelle__ɓà 1
Kono__pɔ̀ Mano__mɔ̀ 1
Kono__pɔ̀ Mano__píé 1
Kono__pɔ́ɲà Kpelle__púlû 1
Kono__yêi Dan_Gweetaa__gɔ̏ 1
Kono__yêi Guro__leè 1
Kpelle__à Bamana__lá 1
Kpelle__à Dan_Gweetaa__bhȁ 1
Kpelle__à Looma__tá 1
Kpelle__à Mano__mɔ̀ 1
Kpelle__hù Dan_Gweetaa__bhàa 1
Kpelle__hù Kono__mà 1
Kpelle__hù Looma__bù 1
Kpelle__hù Looma__mà 1
Kpelle__hù Mano__bà 1
Kpelle__hù Mano__ká 1
Kpelle__hù Mano__yí 1
Kpelle__lá Kono__lá 1
Kpelle__lá Looma__yà 1
Kpelle__lá Mano__là 1
Kpelle__púlû Bamana__kàn 1
Kpelle__púlû Bamana__lá 1
Kpelle__púlû Bamana__nɔ̀fɛ̀ 1
Kpelle__púlû Dan_Gweetaa__kèŋ̏ 1
Kpelle__púlû Kono__pòò 1
Kpelle__púlû Kono__pɔ́ɲà 1
Kpelle__púlû Looma__pòlù 1
Kpelle__pɔ́ Bamana__fɛ̀ 1
Kpelle__pɔ́ Dan_Gweetaa__pi̋ɤ 1
Kpelle__pɔ́ Kono__pɔ̀ 1
Kpelle__pɔ́ Looma__mà 1
Kpelle__yêi Dan_Gweetaa__gɔ̏ 1
Kpelle__yêi Guro__leè 1
Kpelle__ɓà Bamana__fɛ̀ 1
Kpelle__ɓà Bamana__kàn 1
Kpelle__ɓà Bamana__kɔ́ 1
Kpelle__ɓà Bamana__nɔ̀fɛ̀ 1
Kpelle__ɓà Bamana__ɲɛ́ 1
Kpelle__ɓà Dan_Gweetaa__gɔ̏ 1
Kpelle__ɓà Dan_Gweetaa__ká 1
Kpelle__ɓà Dan_Gweetaa__tȁ 1
Kpelle__ɓà Guro__leè 1
Kpelle__ɓà Kono__mɛ̌i 1
Kpelle__ɓà Kono__pɔ̀ 1
Kpelle__ɓà Looma__bɛ̀ 1
Kpelle__ɓà Mano__bà 1
Kpelle__ɓà Mano__kɛ̀lɛ̀ 1
Kpelle__ɓà Mano__là 1
Kpelle__ɓà Mano__lɛ̀ɛ̄ 1
Kpelle__ɓà Mano__píé 1
Kpelle__ɓà Mano__ŋwɛ́ŋ̀ 1
Looma__bà Bamana__fɛ̀ 1
Looma__bà Bamana__ɲɛ́ 1
Looma__bà Dan_Gweetaa__bhȁ 1
Looma__bà Dan_Gweetaa__gɔ̏ 1
Looma__bà Dan_Gweetaa__ká 1
Looma__bà Guro__leè 1
Looma__bà Guro__ta 1
Looma__bà Guro__và 1
Looma__bà Guro__ɓa̰ 1
Looma__bà Mano__ká 1
Looma__bà Mano__lɛ̀ɛ̄ 1
Looma__bù Bamana__lá 1
Looma__bù Dan_Gweetaa__bhàa 1
Looma__bù Kono__hù 1
Looma__bù Kpelle__hù 1
Looma__bù Mano__bà 1
Looma__bɛ̀ Bamana__nɔ̀fɛ̀ 1
Looma__bɛ̀ Dan_Gweetaa__bhȁ 1
Looma__bɛ̀ Guro__lɛ 1
Looma__bɛ̀ Kono__mà 1
Looma__bɛ̀ Kpelle__ɓà 1
Looma__bɛ̀ Mano__ká 1
Looma__bɛ̀ Mano__mɔ̀ 1
Looma__gà Bamana__lá 1
Looma__gà Bamana__yé 1
Looma__gà Dan_Gweetaa__bhȁ 1
Looma__gà Dan_Gweetaa__ká 1
Looma__gà Guro__ya̰ 1
Looma__gà Kono__mà 1
Looma__gà Mano__bà 1
Looma__kómá Guro__ɓa̰ 1
Looma__mà Bamana__kɔ́ 1
Looma__mà Dan_Gweetaa__bhȁ 1
Looma__mà Dan_Gweetaa__tȁ 1
Looma__mà Dan_Gweetaa__zɯ̏ 1
Looma__mà Guro__jì 1
Looma__mà Guro__zì 1
Looma__mà Kono__hù 1
Looma__mà Kpelle__hù 1
Looma__mà Kpelle__pɔ́ 1
Looma__mà Mano__ká 1
Looma__mà Mano__kɛ̀lɛ̀ 1
Looma__mà Mano__píé 1
Looma__mà Mano__yí 1
Looma__mà Mano__ŋwɛ́ŋ̀ 1
Looma__pòlù Bamana__kàn 1
Looma__pòlù Bamana__kɔ́ 1
Looma__pòlù Bamana__lá 1
Looma__pòlù Bamana__nɔ̀fɛ̀ 1
Looma__pòlù Dan_Gweetaa__kèŋ̏ 1
Looma__pòlù Guro__zuo 1
Looma__pòlù Kpelle__púlû 1
Looma__sù Dan_Gweetaa__bhȁ 1
Looma__sù Dan_Gweetaa__gɯ́ 1
Looma__tá Bamana__fɛ̀ 1
Looma__tá Dan_Gweetaa__ká 1
Looma__tá Guro__ya̰ 1
Looma__tá Kono__à 1
Looma__tá Kpelle__à 1
Looma__tá Mano__ká 1
Looma__tá Mano__lɛ̀ɛ̄ 1
Looma__tá Mano__píé 1
Looma__yà Bamana__bólo 1
Looma__yà Dan_Gweetaa__gɔ̏ 1
Looma__yà Guro__leè 1
Looma__yà Kono__lá 1
Looma__yà Kpelle__lá 1
Looma__yà Mano__là 1
Mano__bà Bamana__lá 1
Mano__bà Dan_Gweetaa__bhàa 1
Mano__bà Dan_Gweetaa__bhȁ 1
Mano__bà Guro__ɓa̰ 1
Mano__bà Kono__hù 1
Mano__bà Kono__mà 1
Mano__bà Kpelle__hù 1
Mano__bà Kpelle__ɓà 1
Mano__bà Looma__bù 1
Mano__bà Looma__gà 1
Mano__gɛ̀nɛ̀ Dan_Gweetaa__gɔ̏ 1
Mano__ká Bamana__ɲɛ́ 1
Mano__ká Dan_Gweetaa__gɔ̏ 1
Mano__ká Guro__leè 1
Mano__ká Guro__ɓa̰ 1
Mano__ká Kono__hù 1
Mano__ká Kpelle__hù 1
Mano__ká Looma__bà 1
Mano__ká Looma__bɛ̀ 1
Mano__ká Looma__mà 1
Mano__ká Looma__tá 1
Mano__ká Mano__kɛ̀lɛ̀ 1
Mano__ká Mano__lɛ̀ɛ̄ 1
Mano__ká Mano__yí 1
Mano__ká Mano__ŋwɛ́ŋ̀ 1
Mano__kɛ̀lɛ̀ Bamana__bólo 1
Mano__kɛ̀lɛ̀ Dan_Gweetaa__gɔ̏ 1
Mano__kɛ̀lɛ̀ Guro__leè 1
Mano__kɛ̀lɛ̀ Kpelle__ɓà 1
Mano__kɛ̀lɛ̀ Looma__mà 1
Mano__kɛ̀lɛ̀ Mano__ká 1
Mano__là Bamana__lá 1
Mano__là Dan_Gweetaa__bhȁ 1
Mano__là Dan_Gweetaa__tȁ 1
Mano__là Guro__lɛ 1
Mano__là Guro__ta 1
Mano__là Kono__hù 1
Mano__là Kono__lá 1
Mano__là Kono__mɛ̌i 1
Mano__là Kpelle__lá 1
Mano__là Kpelle__ɓà 1
Mano__là Looma__yà 1
Mano__lɛ̀ɛ̄ Bamana__fɛ̀ 1
Mano__lɛ̀ɛ̄ Bamana__yé 1
Mano__lɛ̀ɛ̄ Bamana__ɲɛ́ 1
Mano__lɛ̀ɛ̄ Dan_Gweetaa__dhɛ̏ 1
Mano__lɛ̀ɛ̄ Dan_Gweetaa__gɔ̏ 1
Mano__lɛ̀ɛ̄ Guro__leè 1
Mano__lɛ̀ɛ̄ Kono__mà 1
Mano__lɛ̀ɛ̄ Kpelle__ɓà 1
Mano__lɛ̀ɛ̄ Looma__bà 1
Mano__lɛ̀ɛ̄ Looma__tá 1
Mano__lɛ̀ɛ̄ Mano__ká 1
Mano__mɔ̀ Bamana__fɛ̀ 1
Mano__mɔ̀ Bamana__kàn 1
Mano__mɔ̀ Bamana__kɔ́ 1
Mano__mɔ̀ Bamana__nɔ̀fɛ̀ 1
Mano__mɔ̀ Dan_Gweetaa__tȁ 1
Mano__mɔ̀ Dan_Gweetaa__zɯ̏ 1
Mano__mɔ̀ Guro__ya̰ 1
Mano__mɔ̀ Guro__zì 1
Mano__mɔ̀ Kono__à 1
Mano__mɔ̀ Kono__pɔ̀ 1
Mano__mɔ̀ Kpelle__à 1
Mano__mɔ̀ Looma__bɛ̀ 1
Mano__mɔ̀ Mano__píé 1
Mano__mɔ̀ Mano__ŋwɛ́ŋ̀ 1
Mano__píé Bamana__kɔ́ 1
Mano__píé Dan_Gweetaa__dhɛ̏ 1
Mano__píé Dan_Gweetaa__gɯ́ 1
Mano__píé Dan_Gweetaa__pi̋ɤ 1
Mano__píé Guro__zuo 1
Mano__píé Guro__ɓa̰ 1
Mano__píé Kono__mà 1
Mano__píé Kono__pòò 1
Mano__píé Kono__pɔ̀ 1
Mano__píé Kpelle__ɓà 1
Mano__píé Looma__mà 1
Mano__píé Looma__tá 1
Mano__píé Mano__mɔ̀ 1
Mano__yí Bamana__lá 1
Mano__yí Kono__hù 1
Mano__yí Kpelle__hù 1
Mano__yí Looma__mà 1
Mano__yí Mano__ká 1
Mano__ŋwɛ́ŋ̀ Dan_Gweetaa__gɯ́ 1
Mano__ŋwɛ́ŋ̀ Guro__và 1
Mano__ŋwɛ́ŋ̀ Guro__ɓa̰ 1
Mano__ŋwɛ́ŋ̀ Kono__mà 1
Mano__ŋwɛ́ŋ̀ Kpelle__ɓà 1
Mano__ŋwɛ́ŋ̀ Looma__mà 1
Mano__ŋwɛ́ŋ̀ Mano__ká 1
Mano__ŋwɛ́ŋ̀ Mano__mɔ̀ 1

Я попробовал сделать таблицу, но она получается огромная… Возьмем только примеры, где больше одного:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(adposition)) |>  
  group_by(number) |> 
  mutate(adposition = str_c(language, "__", adposition)) |> 
  pairwise_count(adposition, construction_type) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) |> 
  filter(n > 1) |> 
  mutate(item1 = fct_reorder(item1, n, .desc = TRUE),
         item2 = fct_reorder(item2, n, .desc = TRUE)) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = n), colour = "white") +
  geom_text(aes(label = n), colour = "white") +
  scale_fill_gradient(low = "tomato", high = "darkred")+
  coord_fixed()+
  labs(x = "", y = "") +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90, vjust = 0, hjust=1))

Вот так я понял Машину идеи про то, как посмотреть соответствия между языками, берите лупу:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(adposition)) |>  
  group_by(number) |> 
  mutate(adposition = str_c(language, "__", adposition)) |> 
  pairwise_count(adposition, construction_type) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) |> 
  filter(n > 1) |> 
  separate(item1, into = c("language1", "adposition1"), sep = "__") |> 
  separate(item2, into = c("language2", "adposition2"), sep = "__") |> 
  group_by(language1, language2, adposition1) |> 
  reframe(ratio = n/sum(n),
            n = n,
            adposition2 = adposition2) |> 
  distinct() |> 
  mutate(language1 = str_replace(language1, "_", " "),
         language1 = factor(language1, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana")),
         language2 = str_replace(language2, "_", " "),
         language2 = factor(language2, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana"))) |> 
  ggplot(aes(ratio, adposition1, label = str_c(adposition2, "\n",
                                               round(ratio*100), "% (",
                                               n, ")")))+
  geom_col(color = "white")+
  geom_label(position = position_stack(vjust = .5), size = 2)+
  facet_grid(language1~language2, scales = "free")+
  labs(y = "")

Все примеры, без фильтрации

Code
et <- readxl::read_xlsx("../GM_merged_wide_verb_MK_0927.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки, случайная этимология для каждого языка

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть заполненная этимология

Если посмотреть только на те случаи, где заполнена этимология, то останется 21 пример.

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  na.omit() |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Все этимологии:

Code
et <- readxl::read_xlsx("../GM_merged_wide_verb_MK_0927.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym

etym |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym  |> 
  neighborNet() |> 
  plot()
title(main = "Verb etymology (all stimuli)")

Кластеризация со случайным эквивалентом. Я запустил более 10 раз – ничего в структуре не поменялось, только длина ножек менялась.

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |> 
  group_by(number, source) |> 
  sample_n(size = 1) |> 
  ungroup() |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Kono = Kono_etym,
         Mano = Mano_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym_random

etym_random |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym_random  |> 
  neighborNet() |> 
  plot()
title(main = "Verb etymology with random equivalents and common stimuli")

Это сделано по следующей таблице:

Code
et |> 
  select(number, predicate_eng, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = value) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = "") |>
  arrange(number, predicate_eng, value) |> 
  select(-value)
number predicate_eng Guro_etym Looma_etym Mano_etym Dan_etym Bamana_etym Kpelle_etym Kono_etym
1 hurt break (to)-2-wi
1 hurt do-1-ke
1 hurt go out (to)-bo
1 hurt hurt-xona hurt-xona hurt-xona
1 hurt illness-1-jankaro
1 hurt pain-1-dimi
1 hurt sufferance-4-waa
1 hurt turn-pene
2 be sick with catch-2-miita
2 be sick with do-1-ke
3 be afraid of fear-1-g’ila fear-1-g’ila
3 be afraid of fear-3-duwa fear-3-duwa
3 be afraid of fear-4-gaaxu fear-4-gaaxu
4 throw throw-1 (to)-fili throw-1 (to)-fili throw-1 (to)-fili throw-1 (to)-fili throw-1 (to)-fili
4 throw throw-2 (to)-zu throw-2 (to)-zu
5 be sufficient to catch-3-catch
5 be sufficient to go out (to)-bo
6 be like sb go out (to)-bo go out (to)-bo go out (to)-bo go out (to)-bo
6 be like sb knock down-kula knock down-kula knock down-kula
7 have trust confidence-dannaya
7 have trust go out (to)-bo
7 have trust lie down-1-d’a lie down-1-d’a lie down-1-d’a lie down-1-d’a
7 have trust send-2-bo
7 have trust stand (to)-1-d’o stand (to)-1-d’o
8 take take-2-si take-2-si take-2-si
8 take take-3-ta
8 take take-5-sige take-5-sige take-5-sige
9 see what look-2 (to)-ga
9 see what see (to)-1-ye see (to)-1-ye see (to)-1-ye
9 see what see (to)-3-ka see (to)-3-ka see (to)-3-ka
10 influence press-1 (to)-digi
10 influence put-2-kpa
11 meet go out (to)-bo
11 meet meet-ben
11 meet see (to)-1-ye see (to)-1-ye see (to)-1-ye
11 meet see (to)-3-ka
12 enter send-2-bo
12 enter throw-2 (to)-zu
12 enter enter (to)-1 enter (to)-1
12 enter enter (to)-1-so
12 enter enter (to)-2-dzon
12 enter enter (to)-3-wola enter (to)-3-wola
12 enter lie down-1-d’a
13 win able, be-1-mo
13 win beat-1-bugo
13 win do-1-ke
13 win prepare (to)-baa
13 win receive-sodon
13 win rise (to)-1-te
13 win take-2-si
14 leave take-5-sige
14 leave go away (to)-go
14 leave go out (to)-bo go out (to)-bo go out (to)-bo
14 leave knock down-kula knock down-kula knock down-kula
15 chase chase (to)-kpe chase (to)-kpe
15 chase come-3-nu
15 chase go (to)-1-di go (to)-1-di
15 chase go away (to)-go
16 bend bend-4-kula
16 bend bend-1-bidin
16 bend bend-3-kpilin bend-3-kpilin bend-3-kpilin bend-3-kpilin
16 bend bend-4-kula
17 say say-1-fo say-1-fo say-1-fo
17 say say-5-gee
17 say say-6-yee say-6-yee say-6-yee
18 hold catch-3-kun
18 hold catch-4-song
18 hold hold-mara
19 catch catch-3-kun catch-3-kun catch-3-kun
19 catch catch-4-song catch-4-song catch-4-song
19 catch stand (to)-1-do
20 to milk go out (to)-bo
20 to milk knock down-kula knock down-kula knock down-kula
20 to milk milk (to)-bidi
20 to milk take-2-si
21 reach arrive-1-k’e
21 reach arrive-4-kite arrive-4-kite
21 reach go out (to)-bo go out (to)-bo go out (to)-bo
22 touch 0
22 touch touch (to)-4-tungbong touch (to)-4-tungbong
22 touch 0
22 touch sit (to)-1-sigi
22 touch touch (to)-1-maga
22 touch touch (to)-2-pa touch (to)-2-pa
22 touch touch (to)-4-tungbong touch (to)-4-tungbong
23 fight sb do-4-goon do-4-goon do-4-goon do-4-goon do-4-goon
23 fight sb measure (to)-2-dan
23 fight sb war-1-kele
24 be friends with do-1-ke
24 be friends with lie down-1-d’a
25 think about sb come-3-nu
25 think about sb do-1-ke
25 think about sb think (to)-1-miira
25 think about sb walk-1-sigan walk-1-sigan walk-1-sigan
25 think about sb walk-3-taga
26 eat drink (to)-1-min drink (to)-1-min drink (to)-1-min
26 eat eat (to)-1-don/domu
26 eat eat (to)-2-bele eat (to)-2-bele eat (to)-2-bele
27 fry fry (to)-geran fry (to)-geran fry (to)-geran fry (to)-geran fry (to)-geran fry (to)-geran
28 wait help-2-kpong
28 wait remain-to
28 wait stand (to)-1-d’o stand (to)-1-d’o
28 wait wait-1-kono
28 wait wait-2-gben wait-2-gben wait-2-gben wait-2-gben
29 forget forget-2-nemu
29 forget forget-1-nyina forget-1-nyina
29 forget forget-2-nemu forget-2-nemu forget-2-nemu
29 forget go out (to)-bo
29 forget loose (to)-1-sama
30 depend on take-2-si take-2-si
31 call call-2-wele
31 call call-3-dhee call-3-dhee
31 call call-5-toli call-5-toli call-5-toli
31 call do-1-ke
32 meet, faire connaissance know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on
32 meet, faire connaissance know (to)-2-kolon know (to)-2-kolon know (to)-2-kolon
33 know someone know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on
33 know someone know (to)-2-kolon know (to)-2-kolon know (to)-2-kolon
34 play blow-1-fe blow-1-fe
34 play kill-2-je
34 play forge (to)-2-galin forge (to)-2-galin
34 play kill-2-je kill-2-je
34 play say-1-fo
34 play strike-2-ma
35 avoid go out (to)-bo
35 avoid lie down-1-d’a lie down-1-d’a
35 avoid put-2-kpa
35 avoid spread (to)-2-kpo spread (to)-2-kpo
36 fabricate 0
36 fabricate do-1-ke do-1-ke do-1-ke
36 fabricate make-kpeteng make-kpeteng
36 fabricate prepare (to)-baa
36 fabricate weave (to)-1-dan
37 mock stand (to)-1-d’o
37 mock tear (to)-pera
37 mock go out (to)-bo
37 mock knock down-kula
37 mock laugh-1-jele laugh-1-jele laugh-1-jele
37 mock stand (to)-1-d’o
39 seek spread (to)-2-kpo
39 seek search-1-nyini
39 seek search-3-gini
39 seek search-4-koli
39 seek walk-1-sigan
39 seek walk-3-taga
40 paint draw (a picture)-nyege
40 paint pass-2-gile
40 paint rise (to)-1-te
40 paint whiten-fele
40 paint write-2-been
41 bite bite (to)-kin bite (to)-kin bite (to)-kin bite (to)-kin
41 bite catch-3-kun
41 bite stand (to)-1-d’o stand (to)-1-d’o
42 be deprived fall (to)-2-dia
42 be deprived fail-5-kon
42 be deprived fail-6-fenge fail-6-fenge
42 be deprived forget-1-nyina
42 be deprived loose (to)-1-sama
42 be deprived misfortune-1-bono
43 catch chase (to)-kpe
43 catch catch-2-miita
43 catch catch-3-kun catch-3-kun catch-3-kun
43 catch catch-4-song catch-4-song catch-4-song
44 break break (to)-1-gali
44 break break (to)-1-gali break (to)-1-gali break (to)-1-gali break (to)-1-gali break (to)-1-gali break (to)-1-gali
44 break break (to)-2-wi
45 flatter do-1-ke
45 flatter rise (to)-1-te rise (to)-1-te rise (to)-1-te
46 love do-1-ke
46 love go out (to)-bo
46 love do-1-ke do-1-ke
46 love go out (to)-bo
46 love love-xani
47 wave go out (to)-bo
47 wave shake-6-yuguyugu
48 dream go out (to)-bo go out (to)-bo
48 dream kill-2-je
49 wash wash-1-poli
49 wash wash-2-zulu wash-2-zulu
49 wash wash-3-ko wash-3-ko wash-3-ko wash-3-ko
50 put on do-5-wo
50 put on enter (to)-1-so enter (to)-1-so
50 put on enter (to)-2-dzon
50 put on lie down-1-d’a
50 put on pour-2 (to)-pu
51 call smth call-3-dhee call-3-dhee
51 call smth do-1-ke
51 call smth say-5-gee
51 call smth speak-1-xo
52 punish lie down-1-d’a
52 punish lie down-3-wo
52 punish punish-nyangi
52 punish stand (to)-1-d’o
53 attack lie down-1-d’a
53 attack venom-baga
53 attack break (to)-3-golo break (to)-3-golo
53 attack catch-4-song
53 attack fall (to)-1-bele
53 attack fall (to)-3-pele
53 attack rush-3-gbidi
53 attack sit (to)-2-yaga
53 attack venom-baga
54 be filled with smth fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa
55 find smth see (to)-1-ye see (to)-1-ye see (to)-1-ye
55 find smth see (to)-3-ka see (to)-3-ka see (to)-3-ka
56 lack go out (to)-bo
57 hate, detest boil-3-fili
57 hate, detest do-1-ke
57 hate, detest end-6-kpe
57 hate, detest hate-xoni
57 hate, detest take-5-sige
58 like lie down-1-d’a
58 like catch-3-kun
58 like catch-4-song
58 like do-1-ke
58 like good-2
58 like good-2-di
58 like please (to)-so
58 like tasty-nene
59 need catch-3-kun
60 surround encircle-2-kooli
60 surround encircle-1-minin
60 surround encircle-2-kooli
60 surround round-1-dhidhi round-1-dhidhi
60 surround round-3-tintan round-3-tintan
60 surround round-4-kanka
61 be left end-2-bo
61 be left leave (go away)-1-dho leave (go away)-1-dho leave (go away)-1-dho
61 be left remain-to remain-to
62 respond go out (to)-bo
62 respond round-1-dhidhi
62 respond transform-ponden
62 respond agree-3-son
62 respond answer-1-jabi
62 respond go out (to)-bo
62 respond transform-ponden transform-ponden
63 open go out (to)-bo go out (to)-bo go out (to)-bo
63 open open-1-polo
63 open open-2-yele
63 open stand (to)-1-d’o
64 be different fall (to)-4-to
64 be different go out (to)-bo
64 be different knock down-kula
64 be different take-2-si
65 fall behind end-2-bo
65 fall behind leave (go away)-1-dho leave (go away)-1-dho
65 fall behind remain-to remain-to
66 plow do-1-ke do-1-ke
66 plow farm-sene
66 plow go out (to)-bo
66 plow plant (to)-1-sing
66 plow put-2-kpa
67 smell stand (to)-1-d’o
68 traverse, cross cut-1 (to)-kan cut-1 (to)-kan
68 traverse, cross cut-3 (to)-kini
68 traverse, cross cut-4 (to)-tige
68 traverse, cross cut-8 (to)-tebe cut-8 (to)-tebe cut-8 (to)-tebe
68 traverse, cross spread (to)-2-kpo
69 sing fall (to)-4-to fall (to)-4-to fall (to)-4-to fall (to)-4-to fall (to)-4-to
69 sing lie down-1-d’a
69 sing send-2-bo
70 write mark-ponyang
70 write do-1-ke
70 write kill-2-je kill-2-je
70 write mark-ponyang mark-ponyang mark-ponyang
70 write write-safe
71 drink drink (to)-1-min drink (to)-1-min drink (to)-1-min drink (to)-1-min
71 drink drink (to)-2-kpole drink (to)-2-kpole drink (to)-2-kpole
72 melt melt-2-senge
72 melt melt-1-yeelen
72 melt melt-2-senge melt-2-senge melt-2-senge
72 melt pour-2 (to)-pu
73 approach able, be-1-mo
73 approach catch-4-song catch-4-song
73 approach go (to)-2-dho
73 approach meet-ben
74 leave go away (to)-go
74 leave go away (to)-go
74 leave go out (to)-bo
74 leave rise (to)-2-wili
74 leave take-2-si
74 leave take-5-sige take-5-sige take-5-sige
75 75) cover gather-1-dhen
75 75) cover bend-2-bugun
75 75) cover close-1 (to), shut (to)-tugu
75 75) cover close-2 (to), shut (to)-tan
75 75) cover close-3 (to), shut (to)-kporu
75 75) cover fructify-ba
75 75) cover lie down-1-d’a
75 75) cover lie down-4-kpaa lie down-4-kpaa
76 76) remember come-3-nu
76 76) remember descend-1-jigi
76 76) remember lie down-1-d’a
76 76) remember stand (to)-1-d’o
76 76) remember take-2-si
76 76) remember wake up-2-buo wake up-2-buo
76 76) remember walk-1-sigan walk-1-sigan
77 77) help go out (to)-bo
77 77) help throw-2 (to)-zu
77 77) help catch-3-kun
77 77) help go out (to)-bo
77 77) help help-1-deemaa
77 77) help help-2-kpong help-2-kpong help-2-kpong
77 77) help pass-3-tanbi
78 78) understand hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli
78 78) understand understand-fa’amu
79 79) fall into descend-1-jigi descend-1-jigi
79 79) fall into fall (to)-4-to fall (to)-4-to fall (to)-4-to
79 79) fall into lie down-1-d’a lie down-1-d’a
80 80) cut with wound-3-gii
80 80) cut with cut-1 (to)-kan cut-1 (to)-kan
80 80) cut with cut-4 (to)-tige
80 80) cut with cut-5 (to)-sege
80 80) cut with cut-8 (to)-tebe cut-8 (to)-tebe
80 80) cut with wound-4-tugo
81 81) stick to smth fasten-nodo fasten-nodo
81 81) stick to smth stuck, get (to)-kpatang stuck, get (to)-kpatang stuck, get (to)-kpatang
82 82) lose to smb prepare (to)-baa
83 83) be happy about, se rejouir de catch-3-kun
83 83) be happy about, se rejouir de do-1-ke
83 83) be happy about, se rejouir de good-2
83 83) be happy about, se rejouir de good-2-di
83 83) be happy about, se rejouir de tasty-nene tasty-nene tasty-nene
84 84) speak with do-1-ke
84 84) speak with fall (to)-4-to
84 84) speak with fall (to)-4-to
84 84) speak with go out (to)-bo go out (to)-bo
84 84) speak with kill-2-je
84 84) speak with speak-3-we
84 84) speak with stand (to)-1-d’o
84 84) speak with talk-1-bado
85 85) give birth to bear, give birth-1-banki
85 85) give birth to save-2-dha
85 85) give birth to bear, give birth-3-yaa bear, give birth-3-yaa
85 85) give birth to bear, give birth-5
85 85) give birth to descend-1-jigi descend-1-jigi
85 85) give birth to receive-sodon
85 85) give birth to see (to)-3-ka
85 85) give birth to spread (to)-2-kpo
86 86) let fall smth fall (to)-1-bele
86 86) let fall smth fall (to)-2-dia
86 86) let fall smth fall (to)-4-to fall (to)-4-to fall (to)-4-to
86 86) let fall smth go out (to)-bo
86 86) let fall smth lie down-1-d’a
87 87) diriger, gouverner take-2-si
87 87) diriger, gouverner go out (to)-bo
87 87) diriger, gouverner say-5-gee
88 88) miss catch-3-kun
88 88) miss catch-4-song catch-4-song
89 89) follow go away (to)-go
89 89) follow pass-2-gile
89 89) follow put-1-bila put-1-bila
90 90) climb down put-1-bila
90 90) climb down descend-1-jigi descend-1-jigi descend-1-jigi
90 90) climb down descend-2-yolo descend-2-yolo
90 90) climb down knock down-kula
90 90) climb down put-1-bila
91 91) listen to smth hear (to)-moli hear (to)-moli hear (to)-moli
91 91) listen to smth remain-to
91 91) listen to smth stand (to)-1-d’o stand (to)-1-d’o stand (to)-1-d’o stand (to)-1-d’o stand (to)-1-d’o
92 92) listen to smb go out (to)-bo
92 92) listen to smb obey-kolo obey-kolo
92 92) listen to smb sit (to)-2-yaga
92 92) listen to smb spread (to)-2-kpo
93 93) hear hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli
93 93) hear stand (to)-1-d’o
94 94) mix up with smth mix-4-nyaxami
94 94) mix up with smth mix-6-sunpu mix-6-sunpu
95 95) look at end-3-nyia
95 95) look at look-1 (to)-felen
95 95) look at look-2 (to)-ga look-2 (to)-ga
95 95) look at look-4 (to)-kpele
95 95) look at search-3-gini
95 95) look at see (to)-3-ka see (to)-3-ka
96 96) take off go out (to)-bo go out (to)-bo go out (to)-bo go out (to)-bo
96 96) take off knock down-kula knock down-kula
97 97) appear in a dream dream-1-k’uyi
97 97) appear in a dream go out (to)-bo go out (to)-bo go out (to)-bo
97 97) appear in a dream kill-2-je
98 98) agree lie down-1-d’a
98 98) agree 0
98 98) agree agree-3-son
98 98) agree lie down-1-d’a
98 98) agree speak-3-we
98 98) agree stand (to)-1-d’o
98 98) agree tasty-nene
99 99) dispute do-1-ke
99 99) dispute do-1-ke do-1-ke
99 99) dispute noise-1-sonxo
99 99) dispute stand (to)-1-d’o
99 99) dispute war-1-kele
101 101) shoot at destroy (to)-1-te
101 101) shoot at lie down-1-d’a lie down-1-d’a
101 101) shoot at shoot, sting (to)-bon
101 101) shoot at stand (to)-1-to stand (to)-1-to
101 101) shoot at throw-1 (to)-fili
102 102) pour (dry product) do-1-ke do-1-ke
102 102) pour (dry product) go out (to)-bo
102 102) pour (dry product) pour-2 (to)-pu pour-2 (to)-pu
103 103) lose loose (to)-1-sama
103 103) lose drop-1 (to)-budun
103 103) lose fall (to)-2-dia
103 103) lose fall (to)-4-to
103 103) lose forget-1-nyina
103 103) lose loose (to)-2-leenu loose (to)-2-leenu
103 103) lose throw-1 (to)-fili
104 104) drown remain-to
104 104) drown die-1-kha
104 104) drown disappear-tunu
104 104) drown enter (to)-1-so
104 104) drown leave (go away)-1-dho leave (go away)-1-dho
104 104) drown remain-to
105 105) kill kill-1-faxa kill-1-faxa kill-1-faxa kill-1-faxa
105 105) kill kill-2-je kill-2-je kill-2-je
106 106) hit 0hit-kele 0hit-kele
106 106) hit beat-1-bugo
106 106) hit beat-2-dokpe
106 106) hit strike-2-ma strike-2-ma
107 107) kiss stand (to)-1-d’o
108 108) read read-2-lonong
108 108) read go out (to)-bo
108 108) read read-1-kara read-1-kara
108 108) read say-1-fo say-1-fo
108 108) read say-5-gee
109 109) move smth 0move-tumu 0move-tumu
109 109) move smth go out (to)-bo
109 109) move smth move (to)-3-lamaga
109 109) move smth noise-2-vin
109 109) move smth shake-1-miimii
110 110) respect do-1-ke
110 110) respect give (to)-3-fe
110 110) respect go out (to)-bo
110 110) respect heavy-1-bi heavy-1-bi
110 110) respect respect-3-bonya
110 110) respect sit (to)-2-yaga
111 111) disdain spoil-1-sile
111 111) disdain do-1-ke
111 111) disdain go out (to)-bo go out (to)-bo
111 111) disdain lie down-1-d’a
111 111) disdain loathe-nyigin
111 111) disdain see (to)-1-ye
112 112) be happy about fill (to)-1-pa
112 112) be happy about good-2-di
112 112) be happy about satisfy-wasa
112 112) be happy about tasty-nene tasty-nene tasty-nene
113 113) fall in love with lie down-1-d’a lie down-1-d’a
113 113) fall in love with love-xani
113 113) fall in love with passion-jarabi
113 113) fall in love with stand (to)-1-to stand (to)-1-to
114 114) trust in sb lie down-1-d’a lie down-1-d’a
115 115) have pity of catch-4-song
115 115) have pity of do-1-ke do-1-ke
115 115) have pity of pity-2-hina
116 116) be envious of cut-1 (to)-kan
116 116) be envious of do-1-ke
116 116) be envious of do-5-wo
116 116) be envious of fill (to)-1-pa
116 116) be envious of hate-xoni
116 116) be envious of swell-3-fuunu
117 117) be angry with bind-gidi
117 117) be angry with tie-1
117 117) be angry with bend-4-kula
117 117) be angry with bitter-2-xunan
117 117) be angry with boil-3-fili
117 117) be angry with catch-4-song
117 117) be angry with pain-1-dimi
117 117) be angry with pain-2-soli
117 117) be angry with take-2-si
118 118) be surprised about do-1-ke
118 118) be surprised about knock down-kula
118 118) be surprised about throw-1 (to)-fili
118 118) be surprised about throw-2 (to)-zu
119 119) love smth do-1-ke
119 119) love smth good-2-di
119 119) love smth please (to)-so
119 119) love smth tasty-nene
120 120) enjoy/ take pleasure in go out (to)-bo
120 120) enjoy/ take pleasure in knock down-kula
120 120) enjoy/ take pleasure in see (to)-1-ye
120 120) enjoy/ take pleasure in take-2-si
120 120) enjoy/ take pleasure in tasty-nene
121 121) want do-1-ke
122 122) be angry with = 117? boil-3-fili
122 122) be angry with = 117? catch-4-song
123 123) take offense by sb do-1-ke
123 123) take offense by sb go out (to)-bo
123 123) take offense by sb lie down-1-d’a
124 124) make sad sb chase (to)-kpe
124 124) make sad sb cut-8 (to)-tebe
124 124) make sad sb do-1-ke
124 124) make sad sb enter (to)-2-dzon
124 124) make sad sb kill-1-faxa
124 124) make sad sb knock down-kula
124 124) make sad sb wound-3-gii
125 125) be surprised by end-1-ban
125 125) be surprised by throw-1 (to)-fili
126 126) despise sb kill-2-je
126 126) despise sb better-fisa
126 126) despise sb do-1-ke
126 126) despise sb go out (to)-bo go out (to)-bo
126 126) despise sb stand (to)-1-d’o
127 127) be sad because of sb fill (to)-1-pa
127 127) be sad because of sb break (to)-1-gali
127 127) be sad because of sb cry, weep (to)-1-kasi
127 127) be sad because of sb cut-8 (to)-tebe
127 127) be sad because of sb stand (to)-1-d’o
127 127) be sad because of sb tasty-nene
128 128) be annoyed by sb boil-3-fili
128 128) be annoyed by sb catch-4-song
128 128) be annoyed by sb pain-1-dimi
129 129) sympathise to sb help-2-kpong
129 129) sympathise to sb go out (to)-bo go out (to)-bo
130 130) be embarrassed by sb cold-3-deli
130 130) be embarrassed by sb embarrass-2-kpala embarrass-2-kpala embarrass-2-kpala
130 130) be embarrassed by sb hinder-1-degun

Все примеры, без фильтрации

Code
et <- readxl::read_xlsx("../GM_merged_wide_adpositions_MK_0926.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки, случайная этимология для каждого языка

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть заполненная этимология

К сожалению, таких строчек всего одна:

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  na.omit()
number predicate_eng stimuli Guro etym Looma etym Mano etym Dan etym Kono etym Kpelle etym Bamana etym
3 be afraid of (P. has to go out of the house, but there is a dog barking in the yard). P. a peur du chien. palm of hand-degere postposition by-ba postposition-for-2-len; postposition with-3-ga postposition-for-1-gon postposition-on-1-ma postposition-on-1-ma eye-nya

Все этимологии:

Code
et <- readxl::read_xlsx("../GM_merged_wide_adpositions_MK_0926.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym

etym |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym  |> 
  neighborNet() |> 
  plot()
title(main = "Adposition etymology (all stimuli)")

Кластеризация со случайным эквивалентом. Я запустил более 10 раз – ничего в структуре не поменялось, только длина ножек менялась.

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |> 
  group_by(number, source) |> 
  sample_n(size = 1) |> 
  ungroup() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Kono = Kono_etym,
         Mano = Mano_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym_random

etym_random |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym_random  |> 
  neighborNet() |> 
  plot()
title(main = "Adposition etymology with random equivalents")

Это сделано по следующей таблице:

Code
et |> 
  select(number, predicate_eng, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = value) |> 
  pivot_wider(names_from = source, values_from = value2) |>
  arrange(number, predicate_eng, value) |> 
  select(-value)
number predicate_eng Dan etym Guro etym Looma etym Mano etym Kono etym Kpelle etym Bamana etym
1 hurt postposition with-3-ga NA NA NA NA NA NA
1 hurt postposition-on-1-ma NA NA NA NA NA NA
2 be sick with NA NA NA NA NA NA postposition at-la
2 be sick with postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
3 be afraid of NA NA NA postposition with-3-ga NA NA NA
3 be afraid of NA NA NA NA NA NA eye-nya
3 be afraid of NA palm of hand-degere NA NA NA NA NA
3 be afraid of NA NA postposition by-ba NA NA NA NA
3 be afraid of postposition-for-1-gon NA NA NA NA NA NA
3 be afraid of NA NA NA postposition-for-2-len NA NA NA
3 be afraid of NA NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
5 be sufficient to NA NA NA NA arm-2-je arm-2-je NA
5 be sufficient to NA NA NA NA hand hand NA
6 be like sb NA NA NA postposition by-ba NA NA NA
6 be like sb NA NA postposition with-3-ga NA NA NA NA
6 be like sb postposition-on-1-ma postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma NA
6 be like sb NA NA NA NA NA NA side-3-fe
7 have trust before-dhie NA NA NA NA NA NA
7 have trust NA NA NA NA NA NA postposition at-la
7 have trust NA postposition-for-2-len NA NA NA NA NA
7 have trust NA NA NA postposition-on-1-ma NA postposition-on-1-ma NA
7 have trust NA NA NA surface-1-ta surface-1-ta surface-1-ta NA
9 see what postposition-on-1-ma NA NA NA NA NA NA
10 influence NA surface-1-ta NA NA NA NA NA
11 meet NA postposition with-2-yan NA NA NA NA NA
11 meet NA NA NA NA postposition with-3-ga postposition with-3-ga NA
11 meet postposition-on-1-ma NA NA postposition-on-1-ma NA NA NA
13 win NA postposition-for-2-len NA NA NA NA NA
13 win postposition-on-1-ma NA NA postposition-on-1-ma NA postposition-on-1-ma NA
13 win NA NA NA NA NA NA postposition-on-2-xan
14 leave NA NA NA NA NA NA belly-2-xono
14 leave NA NA NA NA NA NA stomach
15 chase NA postposition with-2-yan NA NA NA NA NA
15 chase NA NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
17 say NA NA NA NA NA NA postposition with-2-yan
17 say postposition-for-2-len postposition-for-2-len NA postposition-for-2-len NA NA NA
17 say NA NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA
18 hold NA NA NA NA arm-2-je arm-2-je NA
18 hold NA NA NA NA hand hand NA
18 hold NA NA NA postposition-on-1-ma NA NA NA
18 hold NA surface-1-ta NA NA NA NA NA
19 catch NA NA NA NA NA NA postposition at-la
19 catch NA postposition-on-1-ma NA NA NA NA NA
21 reach NA NA NA NA NA NA postposition at-la
21 reach NA postposition-for-2-len NA NA NA NA NA
21 reach NA NA NA NA postposition-on-1-ma postposition-on-1-ma NA
21 reach NA NA NA surface-1-ta NA NA NA
22 touch NA NA NA NA NA NA postposition at-la
22 touch NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
23 fight sb NA postposition by-ba NA NA NA NA NA
23 fight sb postposition with-3-ga NA postposition with-3-ga NA NA NA NA
24 be friends with NA NA NA NA NA back-1-pulu NA
24 be friends with NA postposition with-2-yan NA NA NA NA NA
24 be friends with NA NA postposition with-3-ga NA postposition with-3-ga postposition with-3-ga NA
24 be friends with NA NA NA side-3-fe NA NA NA
25 think about sb NA NA NA NA NA NA postposition at-la
25 think about sb postposition in-3-gi NA NA NA NA NA NA
25 think about sb NA postposition-for-2-len NA NA NA NA NA
25 think about sb NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
25 think about sb NA NA NA surface-1-ta NA NA NA
28 wait postposition-for-1-gon NA NA NA NA NA NA
29 forget NA NA NA NA NA NA back-3-xo
29 forget postposition with-3-ga NA NA NA NA NA NA
29 forget NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
30 depend on NA NA NA NA NA NA postposition at-la
30 depend on NA postposition by-ba NA NA NA NA NA
30 depend on postposition-on-1-ma NA NA NA NA NA NA
35 avoid postposition with-3-ga NA NA NA NA NA NA
35 avoid NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
37 mock NA NA postposition in-2-sun NA NA NA NA
37 mock postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
38 have NA NA NA NA arm-2-je arm-2-je NA
38 have NA NA NA NA hand hand NA
38 have NA palm of hand-degere NA NA NA NA NA
38 have postposition-for-1-gon NA NA NA NA NA NA
38 have NA NA NA NA NA NA side-3-fe
39 seek NA NA NA postposition with-3-ga NA NA NA
40 paint NA postposition-on-1-ma NA NA NA postposition-on-1-ma NA
41 bite NA postposition-on-1-ma NA NA NA NA NA
42 be deprived NA NA NA postposition with-3-ga NA NA NA
42 be deprived NA NA NA NA NA NA postposition at-la
42 be deprived NA NA NA NA postposition in-2-sun postposition in-2-sun NA
42 be deprived NA NA NA postposition in-3-gi NA NA NA
42 be deprived NA NA postposition-on-1-ma NA NA NA NA
46 love NA NA NA postposition with-3-ga NA NA NA
46 love NA NA postposition-for-3-be NA NA NA NA
46 love NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
46 love NA NA NA NA NA NA side-3-fe
48 dream NA NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
50 put on NA postposition-on-1-ma NA NA NA NA NA
51 call smth NA NA postposition with-3-ga NA postposition with-3-ga postposition with-3-ga NA
51 call smth NA NA NA postposition-for-2-len NA NA NA
52 punish NA postposition in-3-gi NA NA NA NA NA
52 punish NA NA postposition-on-1-ma NA NA NA NA
52 punish surface-1-ta NA NA NA NA NA NA
53 attack NA NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma
53 attack NA NA NA NA NA NA postposition-on-2-xan
53 attack surface-1-ta surface-1-ta NA NA NA NA NA
54 be filled with smth NA NA NA NA NA NA postposition at-la
54 be filled with smth NA postposition with-2-yan NA NA NA NA NA
54 be filled with smth postposition with-3-ga NA postposition with-3-ga postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
55 find smth postposition-on-1-ma NA NA NA NA NA NA
56 lack NA NA NA 0 NA NA NA
56 lack NA NA NA postposition with-3-ga NA NA NA
57 hate, detest NA NA NA postposition in-3-gi NA NA NA
57 hate, detest postposition-for-1-gon NA NA NA NA NA NA
57 hate, detest NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
58 like NA NA NA side-3-fe NA NA NA
58 like NA NA NA 0 NA NA NA
58 like NA NA NA NA NA NA postposition with-2-yan
58 like NA postposition-for-2-len NA postposition-for-2-len NA NA NA
58 like NA NA postposition-for-3-be NA NA NA NA
58 like NA NA NA NA postposition-on-1-ma NA NA
59 need NA NA NA NA NA NA postposition at-la
59 need NA NA postposition by-ba NA NA NA NA
59 need postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
60 surround around-zi around-zi NA NA NA NA NA
60 surround NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA NA
61 be left NA NA NA NA arm-2-je arm-2-je NA
61 be left NA NA NA NA hand hand NA
61 be left NA palm of hand-degere NA NA NA NA NA
62 respond NA NA NA NA postposition-on-1-ma NA NA
64 be different NA NA NA NA NA NA postposition at-la
64 be different postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
65 fall behind NA NA back-1-pulu NA back-1-pulu back-1-pulu NA
65 fall behind NA NA NA NA NA NA back-3-xo
65 fall behind NA bottom-1-ju NA NA NA NA NA
65 fall behind NA NA NA side-3-fe NA NA NA
67 smell NA postposition with-2-yan NA NA NA NA NA
67 smell NA NA postposition-for-3-be NA NA NA NA
67 smell postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
67 smell NA NA NA NA NA NA trace-1-pon
68 traverse, cross NA NA NA surface-1-ta NA NA NA
73 approach NA around-zi NA NA NA NA NA
73 approach postposition-on-1-ma NA NA NA NA NA postposition-on-1-ma
74 leave NA NA NA NA NA NA postposition at-la
75 75) cover surface-1-ta surface-1-ta NA NA NA NA NA
75 75) cover postposition-on-1-ma postposition-on-1-ma NA NA NA NA NA
75 75) cover NA surface-1-ta NA surface-1-ta NA NA NA
76 76) remember NA NA postposition in-2-sun NA NA NA NA
76 76) remember NA NA NA NA NA NA postposition at-la
76 76) remember NA postposition in-3-gi NA NA NA NA NA
76 76) remember postposition with-3-ga NA NA postposition with-3-ga NA NA NA
76 76) remember NA NA postposition-on-1-ma NA postposition-on-1-ma NA NA
76 76) remember NA NA NA surface-1-ta NA NA NA
76 76) remember NA NA NA NA NA surface-2-ga NA
77 77) help NA postposition by-ba postposition by-ba NA NA NA NA
77 77) help NA NA NA postposition-on-1-ma NA postposition-on-1-ma NA
77 77) help NA NA NA NA trace-1-pon NA NA
79 79) fall into NA NA NA NA NA NA postposition-on-2-xan
80 80) cut with NA postposition with-2-yan NA NA NA NA NA
80 80) cut with postposition with-3-ga NA postposition with-3-ga postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
81 81) stick to smth NA NA postposition by-ba NA NA NA NA
81 81) stick to smth postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
81 81) stick to smth NA surface-1-ta NA NA NA NA NA
82 82) lose to smb NA NA NA NA NA NA arm-1-bolo
82 82) lose to smb NA NA NA NA NA NA hand
82 82) lose to smb NA NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
83 83) be happy about, se rejouir de NA postposition by-ba NA NA NA NA NA
83 83) be happy about, se rejouir de NA NA NA NA NA NA postposition at-la
83 83) be happy about, se rejouir de NA postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA
84 84) speak with NA NA NA side-3-fe NA NA NA
84 84) speak with postposition-for-2-len NA NA postposition-for-2-len NA NA NA
84 84) speak with NA NA NA NA NA NA side-3-fe
85 85) give birth to NA NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
86 86) let fall smth NA NA NA NA NA NA arm-1-bolo
86 86) let fall smth NA NA NA NA NA NA hand
87 87) diriger, gouverner before-dhie NA NA NA NA NA NA
87 87) diriger, gouverner NA NA NA NA NA NA postposition with-2-yan
88 88) miss postposition with-3-ga NA NA NA NA NA NA
88 88) miss NA NA NA NA NA NA postposition at-la
88 88) miss surface-1-ta NA NA NA NA NA NA
89 89) follow NA NA back-1-pulu NA NA back-1-pulu NA
89 89) follow NA bottom-1-ju NA NA NA NA NA
89 89) follow occiput-3-kee NA NA NA NA NA NA
89 89) follow NA NA NA NA NA NA postposition at-la
90 90) climb down NA NA NA NA NA NA postposition at-la
91 91) listen to smth postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma NA
91 91) listen to smth NA NA NA NA NA NA side-3-fe
92 92) listen to smb NA NA NA NA NA NA postposition with-2-yan
92 92) listen to smb NA NA postposition-for-3-be NA NA NA NA
93 93) hear NA NA postposition-on-1-ma NA NA NA NA
94 94) mix up with smth NA postposition with-2-yan NA NA NA NA NA
94 94) mix up with smth postposition with-3-ga NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
97 97) appear in a dream NA NA NA NA NA NA postposition at-la
97 97) appear in a dream postposition with-3-ga NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
98 98) agree NA NA NA postposition by-ba NA NA NA
98 98) agree NA NA NA NA postposition in-2-sun NA NA
98 98) agree postposition-on-1-ma NA postposition-on-1-ma NA NA NA postposition-on-1-ma
99 99) dispute NA postposition by-ba NA NA NA NA NA
99 99) dispute NA postposition with-2-yan NA NA NA NA NA
99 99) dispute postposition with-3-ga NA NA NA NA NA NA
99 99) dispute NA NA NA postposition-on-1-ma NA NA NA
100 100) cost NA NA NA NA NA NA postposition with-2-yan
100 100) cost NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
101 101) shoot at NA postposition by-ba NA NA NA NA NA
101 101) shoot at postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma NA
103 103) lose NA NA NA NA NA NA back-3-xo
104 104) drown NA NA bottom-2-bu NA NA NA NA
104 104) drown NA NA NA NA NA NA postposition at-la
104 104) drown postposition by-ba NA NA postposition by-ba NA NA NA
104 104) drown NA NA NA NA postposition in-2-sun postposition in-2-sun NA
107 107) kiss NA postposition-on-1-ma NA NA NA NA NA
110 110) respect postposition in-3-gi NA NA NA NA NA NA
110 110) respect NA NA postposition-on-1-ma NA NA NA NA
111 111) disdain NA NA NA NA NA NA postposition at-la
111 111) disdain postposition in-3-gi NA NA NA NA NA NA
111 111) disdain NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA NA
111 111) disdain NA NA NA side-3-fe NA NA NA
111 111) disdain NA NA NA NA NA trace-1-pon NA
112 112) be happy about NA NA NA 0 NA NA NA
112 112) be happy about NA NA NA NA NA NA postposition at-la
112 112) be happy about NA NA NA NA NA NA postposition with-2-yan
112 112) be happy about NA NA postposition-on-1-ma NA NA NA NA
112 112) be happy about NA NA NA side-3-fe NA NA NA
112 112) be happy about NA NA NA NA trace-1-pon trace-1-pon NA
113 113) fall in love with NA NA NA NA NA NA postposition at-la
113 113) fall in love with postposition in-3-gi NA NA NA NA NA NA
113 113) fall in love with NA NA postposition with-3-ga NA NA NA NA
113 113) fall in love with NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
114 114) trust in sb NA NA NA NA NA NA postposition at-la
114 114) trust in sb NA surface-1-ta NA NA surface-1-ta NA NA
115 115) have pity of NA NA NA NA NA NA postposition at-la
115 115) have pity of NA NA NA postposition-on-1-ma NA postposition-on-1-ma NA
116 116) be envious of postposition with-3-ga NA NA NA NA NA NA
116 116) be envious of NA postposition-on-1-ma NA postposition-on-1-ma NA postposition-on-1-ma NA
116 116) be envious of NA NA NA NA trace-1-pon NA NA
117 117) be angry with NA postposition by-ba NA NA NA NA NA
117 117) be angry with NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA NA
117 117) be angry with side-3-fe NA NA NA NA NA NA
117 117) be angry with NA NA NA NA NA trace-1-pon NA
117 117) be angry with NA NA NA NA NA NA under-1-kodo
118 118) be surprised about NA NA NA NA NA NA postposition at-la
118 118) be surprised about postposition with-3-ga NA postposition with-3-ga postposition with-3-ga NA NA NA
118 118) be surprised about NA postposition-on-1-ma NA NA NA postposition-on-1-ma NA
119 119) love smth NA NA NA NA NA NA postposition with-2-yan
119 119) love smth NA NA NA postposition with-3-ga NA NA NA
119 119) love smth NA postposition-for-2-len NA NA NA NA NA
119 119) love smth NA NA NA NA postposition-on-1-ma postposition-on-1-ma NA
120 120) enjoy/ take pleasure in NA postposition with-2-yan NA NA NA NA NA
120 120) enjoy/ take pleasure in NA NA postposition with-3-ga NA NA NA NA
121 121) want NA postposition by-ba NA NA NA NA NA
121 121) want NA NA NA postposition with-3-ga NA NA NA
121 121) want NA NA postposition-on-1-ma NA NA NA NA
121 121) want side-3-fe NA NA side-3-fe NA NA side-3-fe
121 121) want NA NA NA NA trace-1-pon trace-1-pon NA
122 122) be angry with = 117? NA postposition by-ba NA NA NA NA NA
122 122) be angry with = 117? NA NA NA NA trace-1-pon NA NA
123 123) take offense by sb before-dhie NA NA NA NA NA NA
123 123) take offense by sb NA NA NA NA postposition in-2-sun NA NA
123 123) take offense by sb NA NA NA NA NA postposition with-3-ga NA
123 123) take offense by sb NA postposition-on-1-ma NA NA NA NA NA
124 124) make sad sb NA NA NA NA NA NA postposition at-la
124 124) make sad sb NA NA postposition-on-1-ma NA NA NA NA
124 124) make sad sb NA NA NA NA trace-1-pon NA NA
125 125) be surprised by NA postposition-on-1-ma NA NA postposition-on-1-ma NA NA
126 126) despise sb NA NA NA NA postposition-on-1-ma NA postposition-on-1-ma
127 127) be sad because of sb NA NA postposition by-ba NA NA NA NA
127 127) be sad because of sb NA NA NA postposition with-3-ga NA NA NA
127 127) be sad because of sb NA postposition-on-1-ma NA NA postposition-on-1-ma NA NA
127 127) be sad because of sb NA NA NA NA NA NA side-3-fe
128 128) be annoyed by sb NA NA NA mouth-1-da NA NA NA
128 128) be annoyed by sb postposition-for-2-len NA NA NA NA NA NA
128 128) be annoyed by sb NA postposition-on-1-ma NA NA NA NA NA
128 128) be annoyed by sb NA NA NA NA trace-1-pon NA NA
129 129) sympathise to sb NA NA NA NA NA NA postposition-on-1-ma
130 130) be embarrassed by sb NA NA NA 0 NA NA NA
130 130) be embarrassed by sb NA NA NA postposition with-3-ga NA NA NA
130 130) be embarrassed by sb NA NA postposition-on-1-ma NA NA postposition-on-1-ma NA

Если из двух компонентов совпал 1, мы ставим 0.5, если из трех 1, то 0.33, если из трех два, то 0.66. Если две конструкции, то учитываем максимальное совпадение

Code
complex_verbs <- readxl::read_xlsx("../GM_merged_wide_verb_meaning.xlsx")

complex_verbs |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts),
         number = str_c(number, "_", id)) |> 
  select(number, language, form) |> 
  arrange(number, language, form) ->
  analyze_df

map(sort(unique(analyze_df$number)), function(i){
  analyze_df |> 
    filter(number == i) |> 
    pairwise_count(language, form)  |> 
    mutate(total = case_when(str_detect(item1, "4") | str_detect(item2, "4") ~ 4,
                             str_detect(item1, "3") | str_detect(item2, "3") ~ 3, 
                             TRUE ~ 2),
           value = n/total,
           number = i,
           item1 = str_remove(item1, "_\\d"),
           item2 = str_remove(item2, "_\\d"))  |> 
    select(number, item1, item2, value, total) |> 
    filter(item1 != item2)
}) |> 
  list_rbind() ->
  result

result |> 
  group_by(item1, item2) |> 
  summarise(value = sum(value),
            total = sum(total)) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = value), colour = "white") +
  geom_text(aes(label = round(value, 3)), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "", title = "Heatmap with all stimuli") +
  theme(legend.position = "bottom")

Чем выше значение, тем больше сходство. У меня есть подозрение, что я где-то налажал (ну и я сейчас выкинул все, что я подозревал в ошибке), но оно говорит что-то не совсем дикое: огромное сходство кпелле и коно, большое сходство кпелле и мано… Чудеса…

Я преобразую все вот в такую таблицу. К language я привинчиваю количество частей. Так что, например, первые две строчки нужно читать так: в стимуле номер 1 в языке Dan_Gweetaa 2 части break и interior. Так как таблица отсортирована по номеру стимула, то больше сложных глаголов в первом стимуле нет.

Я сделал некоторый хак, который позволяет делать кластеризацию. Я завожу признаки, которые состоят из фрагмента значения и номера, в котором он встречается:

Code
analyze_df  |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0)
form Bamana Dan_Gweetaa Guro Kpelle Looma Mano Kono
hit_101_1 1 0 0 0 0 0 0
powder_101_1 1 0 0 0 0 0 0
go.up_101_1 0 1 0 0 0 0 0
gun_101_1 0 1 0 1 1 0 0
bow_101_1 0 0 1 0 0 0 0
throw_101_1 0 0 1 0 0 0 0
fall_101_1 0 0 0 1 0 0 0
drop_101_1 0 0 0 0 1 1 0
fusil_101_1 0 0 0 0 0 1 0
implement_102_1 0 0 0 0 0 0 1
mouth_107_1 0 0 0 0 1 0 0
touch_107_1 0 0 0 0 1 0 0
enter_107_2 0 0 0 0 0 0 1
mouth_107_2 0 0 0 0 0 0 1
mouth_107_3 1 0 0 0 0 0 0
smell_107_3 1 0 0 0 0 0 0
say_108_1 0 1 0 0 0 0 0
voice_108_1 0 1 0 0 0 0 0
dire_108_1 0 0 1 0 0 0 0
surface_108_1 0 0 1 0 0 0 0
book_108_2 0 0 0 1 0 0 0
implement_108_2 0 0 0 1 0 0 0
implement_109_1 0 0 0 0 0 1 0
mouvement_109_1 0 0 0 0 0 1 0
put_on_10_1 0 0 1 0 0 0 0
work_10_1 0 0 1 0 0 0 0
surface_10_1 0 0 1 0 0 0 0
mouth_10_2 1 0 0 0 0 0 0
press_10_2 1 0 0 0 0 0 0
give_110_1 0 0 0 0 1 1 0
head_110_1 0 0 0 0 1 0 0
respect_110_1 0 0 0 0 0 1 0
be_heavy_110_2 0 0 0 1 0 0 1
prev_110_2 0 0 0 1 0 1 1
respect_110_2 0 0 0 0 0 1 0
sit_110_2 0 0 0 0 0 1 0
do_110_2 0 0 0 0 0 1 0
heavy_110_2 0 0 0 0 0 1 0
be_110_3 0 1 0 0 0 0 0
respect_110_3 0 1 0 0 0 0 0
accompany_111_1 0 0 1 0 0 0 0
implement_111_1 0 0 0 0 0 1 0
neglect_111_1 0 0 0 0 0 1 0
drop_111_2 0 0 0 0 0 1 0
eye_111_2 0 0 0 0 0 1 0
respect_111_2 0 0 0 0 0 1 0
spoil_111_2 0 0 0 0 0 1 0
be.neg_111_3 0 0 0 0 0 0 1
love_111_3 0 0 0 0 0 0 1
appear_111_4 0 1 0 0 0 0 0
disgust_111_4 0 1 0 0 0 0 0
chase_111_5 0 1 0 0 0 0 0
disgust_111_5 0 1 0 0 0 0 0
eye_112_1 1 0 0 0 0 0 0
fill_112_1 1 0 0 0 0 0 0
be_nice_112_2 0 0 0 0 1 0 1
stomach.loc_112_2 0 0 0 0 1 0 1
be_good_112_2 0 0 0 1 0 0 0
stomach_112_2 0 0 0 1 0 1 0
become_beautiful_112_2 0 0 0 0 0 1 0
go_up_113_1 0 1 0 0 0 0 0
love_113_1 0 1 0 1 0 1 1
fall_113_1 0 0 0 0 0 0 1
drop_113_1 0 0 0 1 0 1 0
do_115_1 0 0 0 0 0 1 0
pity_115_1 0 0 0 0 0 1 0
see_115_2 0 0 0 0 0 0 1
catch_115_2 0 0 0 0 1 0 0
pain_115_2 0 0 0 0 1 0 0
surface_115_2 0 0 0 0 1 0 0
do_115_2 0 0 0 0 0 1 0
pity_115_2 0 0 0 0 0 1 0
do_115_3 0 1 0 0 0 0 0
pity_115_3 0 1 0 0 0 0 0
be_115_4 0 0 0 1 0 0 0
eye_115_4 0 0 0 1 0 0 0
prev-love_115_4 0 0 0 1 0 0 0
do_116_1 0 1 0 0 0 0 0
jealousy_116_1 0 1 0 0 0 0 0
jelousy_116_1 0 0 1 0 0 0 0
wear_116_1 0 0 1 0 0 0 0
cut_116_2 0 0 0 0 0 1 0
heart_116_2 0 0 0 0 0 1 0
anger_117_1 0 1 0 0 0 0 0
take_117_1 0 1 0 0 0 0 0
heart_117_2 0 0 0 0 0 0 1
sting_117_2 0 0 0 0 0 0 1
attach_117_2 0 0 0 0 0 1 0
face_117_2 0 0 0 0 0 1 0
ball_117_2 0 0 0 0 0 1 0
boil_117_3 0 0 1 0 0 0 0
heart_117_3 0 0 1 1 1 0 0
sting_117_3 0 0 0 1 0 0 0
be_bitter_117_3 0 0 0 0 1 0 0
envelop_118_1 0 0 1 0 0 0 0
interior_118_1 0 0 1 0 0 0 0
on.it_118_1 0 0 0 1 0 0 0
stomach.loc_118_1 0 0 0 1 0 0 0
throw_118_1 0 0 0 1 0 1 0
be_dry_118_1 0 0 0 0 1 0 0
mouth_118_1 0 0 0 0 1 0 0
stomach_118_1 0 0 0 0 0 1 0
do_118_2 0 1 0 0 0 0 0
matter_118_2 0 1 0 0 0 0 0
surprise_118_2 0 1 0 0 0 0 0
with_118_2 0 1 0 0 0 0 0
envelop_118_3 0 0 1 0 0 0 0
interior_118_3 0 0 1 0 0 0 0
envelop_118_4 0 0 1 0 0 0 0
interior_118_4 0 0 1 0 0 0 0
do_119_1 0 1 0 0 0 0 0
love_119_1 0 1 0 0 0 0 0
be_119_2 0 0 0 1 0 0 1
love_119_2 0 0 0 1 0 0 1
profit_120_1 1 0 0 0 0 0 0
remove_120_1 1 0 0 0 0 0 0
see_120_1 0 1 0 0 0 0 0
taste_120_1 0 1 0 0 0 1 0
take_120_1 0 0 0 0 0 1 0
become_happy_120_2 0 0 1 0 0 0 0
interior_120_2 0 0 1 0 0 0 0
pleasure_120_2 0 0 0 0 1 0 0
remove_120_2 0 0 0 0 1 0 0
be_good_120_3 0 0 0 1 0 0 0
stomach_120_3 0 0 0 1 0 0 0
become_happy_121_1 0 0 1 0 0 0 0
interior_121_1 0 0 1 0 0 0 0
do_121_2 0 1 0 0 0 0 0
love_121_2 0 1 0 0 0 0 0
heart_122_1 0 0 0 0 0 0 1
sting_122_1 0 0 0 0 0 0 1
boil_122_2 0 0 1 0 0 0 0
heart_122_2 0 0 1 0 0 0 0
implement_123_1 0 0 0 0 0 0 1
passage_123_1 0 0 0 0 0 0 1
interior_123_2 0 0 0 0 0 1 0
search_123_2 0 0 0 0 0 1 0
speech_123_2 0 0 0 0 0 1 0
aggravate_123_3 0 0 1 0 0 0 0
speech_123_3 0 0 1 0 0 0 0
finish_124_1 0 0 0 0 0 0 1
enter_124_2 1 0 0 0 0 0 0
guilt_124_2 1 0 0 0 0 0 0
heart_124_3 0 1 0 0 0 0 0
wound_124_3 0 1 0 0 0 0 0
face_124_3 0 0 0 1 0 0 0
kill_124_3 0 0 0 1 0 0 0
pass_124_3 0 0 0 0 1 0 0
do_124_4 0 0 0 0 0 1 0
pitoyable_124_4 0 0 0 0 0 1 0
pain_124_5 0 0 0 0 1 0 0
remove_124_5 0 0 0 0 1 0 0
on.it_125_1 0 0 0 0 0 0 1
stomach.loc_125_1 0 0 0 0 0 0 1
throw_125_1 0 0 0 0 0 0 1
envelop_125_2 0 0 1 0 0 0 0
interior_125_2 0 0 1 0 0 0 0
envelop_125_3 0 0 1 0 0 0 0
interior_125_3 0 0 1 0 0 0 0
end_125_4 1 0 0 0 0 0 0
means_125_4 1 0 0 0 0 0 0
envelop_125_5 0 0 1 0 0 0 0
interior_125_5 0 0 1 0 0 0 0
implement_126_1 0 0 0 1 0 0 1
insult_126_1 0 0 0 0 0 0 1
pass_126_1 0 0 0 1 0 0 0
do_126_2 0 1 0 0 0 0 0
scorn_126_2 0 1 0 0 0 0 0
install_126_2 0 0 0 0 0 1 0
kill_126_2 0 0 0 0 0 1 0
cry_127_1 1 0 0 0 0 0 0
heart_127_1 1 0 0 0 0 0 0
pass_127_1 0 0 0 0 1 0 0
be_nice_127_2 0 0 0 0 0 0 1
stomach.loc_127_2 0 0 0 0 0 0 1
fire_127_3 0 0 1 0 0 0 0
install_127_3 0 0 1 0 0 0 0
interior_127_3 0 0 1 0 0 0 0
body_127_4 0 0 0 0 0 1 0
break_127_4 0 0 0 0 0 1 0
fill_127_5 0 0 0 0 0 1 0
mouth_127_5 0 0 0 0 0 1 0
heart_128_1 0 0 0 0 0 0 1
sting_128_1 0 0 0 0 0 0 1
boil_128_2 0 0 1 0 0 0 0
heart_128_2 0 0 1 0 0 0 0
implement_129_1 0 0 0 0 0 0 1
sit_129_1 0 0 0 0 0 0 1
cold_130_1 0 0 0 0 1 0 0
heart_130_1 0 0 0 0 1 0 0
inside_130_2 0 1 0 0 0 0 0
obey_130_2 0 1 0 0 0 0 0
bone_13_1 0 1 0 0 0 0 0
take_13_1 0 1 0 0 0 0 0
gain_13_2 1 0 0 0 0 0 0
power_13_2 1 0 0 0 0 0 0
do_13_3 0 1 0 0 0 0 0
head_13_3 0 1 0 0 0 0 0
hand_13_4 0 0 0 1 0 1 0
raise_13_4 0 0 0 1 0 0 0
be_able_13_4 0 0 0 0 0 1 0
prev_16_1 0 0 0 1 0 0 0
stretch_16_1 0 0 0 1 0 0 0
be_18_1 0 0 0 0 0 1 0
hand_18_1 0 0 0 0 0 1 0
break_1_1 0 1 0 0 0 0 0
interior_1_1 0 1 0 0 0 0 0
breast_20_1 0 0 0 1 1 0 1
milk_20_1 0 0 0 1 0 1 1
remove_20_1 0 0 0 1 1 1 1
cold_20_1 0 0 0 0 1 0 0
gain_21_1 0 0 0 1 0 0 0
prev_21_1 0 0 0 1 0 0 0
implement_21_1 0 0 0 0 0 1 0
hand_22_1 0 0 0 1 0 1 0
sit_22_1 0 0 0 1 0 0 0
stretch_22_1 0 0 0 1 0 0 0
touch_22_1 0 0 0 0 0 1 0
fight_23_1 0 1 1 0 0 0 0
wage_23_1 0 1 0 0 0 0 0
sell_23_1 0 0 1 0 0 0 0
battle_23_2 0 0 0 1 0 1 0
fight_23_2 0 0 0 1 0 1 0
do_24_1 0 0 0 0 0 1 0
friendship_24_1 0 0 0 0 0 1 0
be_24_2 0 0 1 0 0 0 0
friend_24_2 0 0 1 0 0 0 0
do_25_1 0 1 0 0 0 0 0
time_25_1 0 1 0 0 0 0 0
mind_25_1 0 0 0 0 1 0 0
sit_25_1 0 0 0 0 1 0 0
interior_25_2 0 0 1 0 0 0 0
remember_25_2 0 0 1 0 0 0 0
stomach_25_2 0 0 1 0 0 0 0
spirit_25_2 0 0 0 1 0 1 1
surface_25_2 0 0 0 1 0 0 1
walk_25_2 0 0 0 1 0 1 1
in_front_of_25_2 0 0 0 0 0 1 0
come_25_3 0 0 0 0 0 1 0
memory_25_3 0 0 0 0 0 1 0
eye_28_1 0 1 0 0 0 0 0
leave_28_1 0 1 0 0 0 0 0
in_front_of_28_2 0 0 1 0 0 0 0
wait_28_2 0 0 1 0 0 0 0
prev_28_2 0 0 0 1 0 1 1
unite_28_2 0 0 0 1 0 1 1
appear_29_1 0 1 0 0 0 0 0
heart_29_1 0 1 0 0 0 1 0
be_mistaken_29_1 0 0 1 0 0 0 0
interior_29_1 0 0 1 0 0 0 0
forget_29_1 0 0 0 0 0 1 0
call_31_1 0 0 0 0 0 1 0
do_31_1 0 0 0 0 0 1 0
implement_35_1 0 0 0 1 0 0 0
back_35_1 0 0 0 0 0 1 0
put_in_35_1 0 0 0 0 0 1 0
lie_35_2 0 0 0 1 1 0 0
prev_35_2 0 0 0 1 0 1 0
surface_35_2 0 0 0 0 1 0 0
put_on_35_2 0 0 0 0 0 1 0
do_36_1 0 1 0 0 0 1 0
thing:CSTR_36_1 0 1 0 0 0 0 0
thing_36_1 0 0 0 0 0 1 0
install_37_1 0 1 0 0 0 0 0
mockery_37_1 0 1 0 0 0 0 0
split_37_1 0 1 0 0 0 0 0
laughter_37_1 0 0 0 0 1 0 0
remove_37_1 0 0 0 0 1 0 0
mockery_37_2 1 0 0 0 0 0 0
remove_37_2 1 0 0 0 0 0 0
tithe_37_2 1 0 0 0 0 0 0
mockery_37_3 1 0 0 0 0 0 0
remove_37_3 1 0 0 0 0 0 0
tithe_37_3 1 0 0 0 0 0 0
eye_39_1 0 0 0 0 0 1 0
put_on_39_1 0 0 0 0 0 1 0
walk_39_1 0 0 0 0 0 1 0
frighten_3_1 0 0 0 0 0 1 0
prev_3_1 0 0 0 0 0 1 0
kaolin_40_1 0 0 1 0 0 0 0
spread_40_1 0 0 1 0 0 0 0
water_40_1 0 0 1 0 0 0 0
go_up_40_1 0 0 0 1 0 0 0
painting_40_1 0 0 0 1 0 0 0
go_up_40_2 0 0 0 0 0 0 1
prev_40_2 0 0 0 0 0 0 1
install_41_1 0 0 1 0 0 0 0
tooth_41_1 0 0 1 0 0 0 0
install_41_2 0 0 0 0 0 1 0
tooth_41_2 0 0 0 0 0 1 0
break_44_1 0 0 0 1 0 0 1
interior_44_1 0 0 0 1 0 0 1
do_45_1 0 1 0 0 0 0 0
falsehood_45_1 0 1 0 0 0 0 0
praise_45_1 0 0 0 0 0 0 1
prev_45_1 0 0 0 0 0 1 1
go_up_45_1 0 0 0 1 0 0 0
prev2_45_1 0 0 0 1 0 0 0
monter_45_1 0 0 0 0 0 1 0
do_46_1 0 0 0 0 0 1 0
love_46_1 0 0 0 0 0 1 0
accompany_46_2 0 0 1 0 0 0 0
heart_46_2 0 0 1 0 0 0 0
do_46_3 0 0 0 0 0 1 0
face_46_3 0 0 0 0 0 1 0
love_46_3 0 0 0 0 0 1 0
matter_46_3 0 0 0 0 0 1 0
remove_46_3 0 0 0 0 0 1 0
do_46_4 0 1 0 0 0 0 0
love_46_4 0 1 0 0 0 0 0
be_46_5 0 0 0 1 0 1 1
love_46_5 0 0 0 1 0 1 1
implement_47_1 0 0 0 0 0 1 0
movement_47_1 0 0 0 0 0 1 0
dream_48_1 0 0 0 1 0 1 1
implement_48_1 0 0 0 1 0 0 1
kill_48_1 0 0 0 0 0 1 0
interior_49_1 0 0 0 0 0 0 1
wash_49_1 0 0 0 1 0 1 1
prev_49_1 0 0 0 1 0 1 0
be_51_1 0 0 0 0 0 1 0
name_51_1 0 0 0 0 0 1 0
be_51_2 0 0 0 1 0 0 1
name_51_2 0 0 0 1 0 0 1
lie_52_1 0 1 0 0 1 0 0
matter_52_1 0 1 1 0 0 0 0
in_front_of_52_1 0 0 1 0 0 0 0
install_52_1 0 0 1 0 0 0 0
law_52_1 0 0 0 0 1 0 0
mouth_54_1 0 0 0 0 1 0 0
fill_54_2 0 0 0 1 0 1 1
mouth_54_2 0 0 0 1 0 1 1
interior_54_2 0 0 0 0 0 1 0
completeness_56_1 0 0 0 0 0 1 0
cop.NEG_56_1 0 0 0 0 0 1 0
completeness_56_2 0 0 0 0 0 1 0
cop.NEG_56_2 0 0 0 0 0 1 0
do_57_1 0 1 0 0 0 0 0
repugnance_57_1 0 1 0 0 0 0 0
take_57_2 0 0 0 0 1 0 0
boil_57_3 0 0 1 0 0 0 0
heart_57_3 0 0 1 0 0 0 0
end_57_4 0 0 0 1 0 0 0
love_57_4 0 0 0 1 0 0 0
be.neg_57_5 0 0 0 1 0 1 1
love_57_5 0 0 0 1 0 1 1
catch_58_1 0 0 0 1 0 1 0
heart_58_1 0 0 0 1 0 1 0
drop_58_1 0 0 0 0 0 1 0
catch_58_2 0 0 0 0 0 1 0
heart_58_2 0 0 0 0 0 1 0
do_58_3 0 1 0 0 0 0 0
love_58_3 0 1 0 0 0 0 0
be_58_4 0 0 0 0 0 0 1
love_58_4 0 0 0 0 0 0 1
be_59_1 0 0 0 1 0 1 1
prev-issue_59_1 0 0 0 0 0 0 1
prev-be_obliged_59_1 0 0 0 1 0 0 0
need_59_1 0 0 0 0 0 1 0
catch_59_2 0 1 0 0 0 0 0
need_59_2 0 1 0 0 0 0 0
be_59_3 0 0 0 1 0 1 0
prev-be_obliged_59_3 0 0 0 1 0 1 0
need_59_3 0 0 0 0 0 1 0
prev_60_1 0 0 0 1 0 1 0
turn_60_1 0 0 0 1 0 1 0
implement_62_1 0 0 0 0 0 0 1
back_62_2 0 0 0 1 0 0 0
prev2_62_2 0 0 0 1 0 0 0
return_62_2 0 0 0 1 0 0 0
speech_62_2 0 0 0 1 0 1 0
accept_62_2 0 0 0 0 0 1 0
interior_62_2 0 0 0 0 0 1 0
remove_62_2 0 0 0 0 0 1 0
surface_62_2 0 0 0 0 0 1 0
turn_62_2 0 0 0 0 0 1 0
answer_62_3 0 1 0 0 0 0 0
appear_62_3 0 1 0 0 0 0 0
question_62_3 0 1 0 0 0 0 0
board_63_1 1 0 0 0 0 0 0
open_63_1 1 0 0 0 0 0 0
enlever_63_1 0 0 1 0 0 0 0
in_front_of_63_1 0 0 1 0 0 0 0
implement_63_1 0 0 0 1 0 0 1
mouth_63_1 0 0 0 1 1 1 1
stand_63_1 0 0 0 0 1 0 0
remove_63_1 0 0 0 0 0 1 0
be_64_1 0 0 0 0 0 1 0
différent_64_1 0 0 0 0 0 1 0
field_66_1 0 0 0 0 0 0 1
make_66_1 0 0 0 0 0 0 1
cultivate_66_1 0 0 0 1 0 0 0
place_66_1 0 0 0 1 0 1 0
remove_66_1 0 0 0 0 0 1 0
do_66_2 0 1 0 0 0 0 0
field_66_2 0 1 0 0 0 0 0
install_67_1 0 0 1 0 0 0 0
smell_67_1 0 0 1 0 0 0 0
surface_67_1 0 0 1 0 0 0 0
be_67_2 0 0 0 1 0 1 1
smell_67_2 0 0 0 1 0 1 1
cross_68_1 0 0 0 0 0 0 1
interior_68_1 0 0 0 0 0 0 1
pass_68_1 0 0 0 0 1 0 0
surface_68_1 0 0 0 0 1 0 0
cut_68_1 0 0 0 0 0 1 0
remove_69_1 0 1 0 0 0 0 0
song_69_1 0 1 0 1 1 1 1
fall_69_1 0 0 0 1 0 0 1
drop_69_1 0 0 0 0 1 0 0
implement_69_1 0 0 0 0 0 1 0
kill_70_1 0 1 0 0 0 0 0
writing_70_1 0 1 0 0 0 0 0
drop_72_1 0 0 1 0 0 0 0
water_72_1 0 0 1 0 0 0 0
encounter_73_1 1 0 0 0 0 0 0
measure_73_1 1 0 0 0 0 0 0
catch_73_2 0 0 0 1 0 0 0
prev_73_2 0 0 0 1 0 0 0
lie_75_1 0 0 0 0 0 0 1
stretch_75_1 0 0 0 0 0 0 1
cover_75_2 1 0 0 0 0 0 0
mouth_75_2 1 0 0 0 0 0 0
in_front_of_75_2 0 0 1 0 0 0 0
unite_75_2 0 0 1 0 0 0 0
close_75_2 0 0 0 1 0 0 0
upper_surface_75_2 0 0 0 1 0 0 0
mind_76_1 0 0 0 0 1 0 0
sit_76_1 0 0 0 0 1 0 0
spirit_76_2 0 0 0 0 0 0 1
surface_76_2 0 0 0 0 0 0 1
walk_76_2 0 0 0 0 0 0 1
heart_76_2 0 0 0 0 0 1 0
wake_76_2 0 0 0 0 0 1 0
memory_76_3 0 0 0 0 0 1 0
take_76_3 0 0 0 0 0 1 0
descend_76_4 1 0 0 0 0 0 0
reason_76_4 1 0 0 0 0 0 0
heart_76_4 0 1 0 0 0 0 0
wake_76_4 0 1 0 0 0 0 0
be_76_5 0 0 0 1 0 0 0
spirit_76_5 0 0 0 1 0 0 0
come_76_6 0 0 0 0 0 1 0
memory_76_6 0 0 0 0 0 1 0
give_77_1 0 0 0 1 0 0 1
help_77_1 0 0 0 1 0 0 1
prev_77_1 0 0 0 0 0 1 0
unite_77_1 0 0 0 0 0 1 0
catch_77_2 0 1 0 0 0 0 0
surface_77_2 0 1 0 0 0 0 0
implement_77_2 0 0 0 1 0 0 0
prev_77_2 0 0 0 1 0 0 0
prev-help_77_2 0 0 0 1 0 0 0
hear_78_1 0 1 0 1 1 0 0
inside_78_1 0 1 0 0 0 0 0
speech_78_1 0 0 0 1 0 0 0
voice_78_1 0 0 0 0 1 0 0
belief_7_1 0 1 0 0 0 0 0
implement_7_1 0 1 0 0 0 0 0
heart_7_2 0 0 1 0 0 1 0
put_in_7_2 0 0 1 0 0 0 0
put_on_7_2 0 0 1 0 0 0 0
install_7_2 0 0 0 1 0 1 0
spirit_7_2 0 0 0 1 0 0 0
heart_7_3 0 0 0 0 0 1 0
install_7_3 0 0 0 0 0 1 0
collect_82_1 1 0 0 0 0 0 0
defeat_82_1 1 0 0 0 0 0 0
be_able_82_2 0 0 0 1 0 1 0
hand_82_2 0 0 0 1 0 1 0
in_83_1 1 0 0 0 0 0 0
please_83_1 1 0 0 0 0 0 0
soul_83_1 1 0 0 0 0 0 0
do_83_2 0 1 0 0 0 0 0
satisfaction_83_2 0 1 0 0 0 0 0
become_happy_83_2 0 0 1 0 0 0 0
interior_83_2 0 0 1 0 0 0 0
be_nice_83_2 0 0 0 1 1 0 1
stomach.loc_83_2 0 0 0 1 1 0 1
become_beautiful_83_2 0 0 0 0 0 1 0
stomach_83_2 0 0 0 0 0 1 0
in_83_3 1 0 0 0 0 0 0
please_83_3 1 0 0 0 0 0 0
soul_83_3 1 0 0 0 0 0 0
catch_83_3 0 1 0 0 0 0 0
heart_83_3 0 1 0 0 0 0 0
kill_84_1 0 1 0 0 0 0 0
talk_84_1 0 1 0 0 0 0 0
implement_84_1 0 0 0 0 0 1 0
jugement_84_1 0 0 0 0 0 1 0
install_84_2 0 0 0 1 0 1 1
talk_84_2 0 0 0 1 0 0 1
do_84_2 0 0 0 1 0 0 0
implement_84_2 0 0 0 1 0 0 0
chat_84_2 0 0 0 0 0 1 0
descend_85_1 0 0 0 1 0 0 1
mouth_85_1 0 0 0 1 0 0 1
implement_87_1 0 0 0 1 0 0 0
prev2-speech_87_1 0 0 0 1 0 0 0
surface_87_1 0 0 0 0 0 1 0
take_87_1 0 0 0 0 0 1 0
interior_87_1 0 0 0 0 0 1 0
say_87_1 0 0 0 0 0 1 0
speech_87_1 0 0 0 0 0 1 0
heart_88_1 0 1 0 0 0 0 0
implement_88_1 0 1 0 0 0 0 0
catch_88_2 0 0 0 1 0 1 1
love_88_2 0 0 0 1 0 1 1
behind_89_1 0 0 0 0 0 1 0
chase_89_1 0 0 0 0 0 1 0
down_90_1 0 1 0 0 0 0 0
go_90_1 0 1 0 0 0 0 0
ear_91_1 0 0 0 0 1 0 0
stand_91_1 0 0 0 0 1 0 0
ear_91_2 1 1 0 1 0 1 0
prev-stand_91_2 1 0 0 0 0 0 0
leave_91_2 0 1 0 0 0 0 0
install_91_2 0 0 0 1 0 1 0
ear_91_3 0 0 0 0 0 0 1
install_91_3 0 0 0 0 0 0 1
hear_91_3 0 0 0 1 0 0 0
speech_91_3 0 0 0 1 0 0 0
respect_92_1 0 1 0 0 0 0 0
sit_92_1 0 1 0 0 0 0 0
ear_93_1 0 0 0 0 1 0 0
stand_93_1 0 0 0 0 1 0 0
hear_93_2 0 0 0 1 0 0 0
speech_93_2 0 0 0 1 0 1 0
entendre_93_2 0 0 0 0 0 1 0
finish_95_1 0 0 0 1 0 1 0
prev_95_1 0 0 0 1 0 1 0
dream_97_1 0 0 0 1 0 1 1
implement_97_1 0 0 0 1 0 0 1
kill_97_1 0 0 0 0 0 1 0
dream_97_2 0 1 0 0 0 0 0
remove_97_2 0 1 0 0 0 0 0
lie_98_1 0 0 0 0 1 0 0
speech_98_1 0 0 0 0 1 0 0
install_98_2 0 0 0 0 0 1 0
speech_98_2 0 0 0 0 0 1 0
install_98_3 0 0 0 0 0 1 0
speech_98_3 0 0 0 0 0 1 0
put_99_1 0 1 0 0 0 0 0
quarrel_99_1 0 1 0 0 0 0 0
make_99_2 0 0 0 1 0 0 1
work_99_2 0 0 0 0 0 0 1
argument_99_2 0 0 0 1 0 0 0
do_99_2 0 0 0 0 0 1 0
install_99_2 0 0 0 0 0 1 0
speech_99_2 0 0 0 0 0 1 0
impertinence_99_3 0 0 0 0 0 1 0
install_99_3 0 0 0 0 0 1 0

Все примеры, без фильтрации

Code
analyze_df  |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  t() |> 
  dist(method = "binary") ->
  complex_verbs_distance
Code
complex_verbs_distance |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs_distance  |> 
  neighborNet() |> 
  plot()
title(main = "Complex verbs semantics (all stimuli)")

Конечно, при таком варианте получается много единичных случаев, которые удлинняют палки и в кластеризации и в нейборнете, однако таких случаев, когда во всех семи языках есть форма.

Только стимулы, для которых есть все языки

Что если посмотреть только на те стимулы, для которых есть какая-то форма (сложный глагол или нет):

Code
analyze_df  |>
  mutate(old_number = str_extract(number, "\\d{1,3}_"),
         old_number = str_remove(old_number, "_")) |> 
  filter(old_number %in% to_keep) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  t() |> 
  dist(method = "binary") ->
  complex_verbs_distance_subset
Code
complex_verbs_distance_subset |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs_distance_subset  |> 
  neighborNet() |> 
  plot()
title(main = "Complex verbs semantics with common stimuli")

Только стимулы, для которых есть все языки, случайная этимология для каждого языка

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
library(lingtypology)
readxl::read_xlsx("../for_map.xlsx") |> 
  mutate(language_name = lang.gltc(glottocode)) ->
  for_map

map.feature(languages = for_map$language_name,
            latitude = for_map$latitude,
            longitude = for_map$longitude,
            color = "black", 
            width = 6) |> 
  map.feature(languages = for_map$language_name,
            latitude = for_map$latitude,
            longitude = for_map$longitude,
            features = for_map$color,
            color = for_map$color, 
            legend = FALSE,
            tile = "Esri.WorldGrayCanvas",
            label = for_map$language,
            label.position = "bottom", 
            label.hide = FALSE,
            minimap = TRUE,
            pipe.data = _)

Construction frequency

МК: я предлагаю сделать для статьи простую таблицу, где будут сводные данные по всем языкам и только самые распространенные типы, а в приложении более полную таблицу с конкретными языками.

Code
df |> 
  mutate(language = ifelse(language == "Dan_Gweetaa", "Dan Gweetaa", language)) |> 
  filter(number %in% to_keep) |> 
  count(language, construction_type) |> 
  group_by(language) |> 
  arrange(desc(n)) |> 
  slice_head(n = 10) |>
  mutate(construction_type = tidytext::reorder_within(construction_type, n, language)) |> 
  ggplot(aes(n, construction_type))+
  geom_col()+
  facet_wrap(~language, scales = "free")+
  tidytext::scale_y_reordered()+
  labs(x = "", y = "")+
  theme_minimal()

Caption: Top 10 constructions by the frequency.

Missed adpositions

Список контекстов где есть послелог, но нет этимологии

Code
df |> 
  filter(is.na(adposition),
         str_detect(construction_type, "Ad")) |> 
  arrange(language, number) 
number construction construction_type adposition verb verb_meaning language

New construction type

Давай будем делать так. Давай сделаем новую табличку с construction types. X Aux Y V, Y Aux X V, X Aux Y N V, Y Aux X N V запишем просто как transitive; X Aux V loc запишем как NOM_loc; во всех примерах с X Aux V Y Ad, Y Aux V X Ad, X Aux N V Y Ad, Y Aux N V X Ad, X Aux Xrefl N V Y Ad, Y Aux Yrefl N V X Ad заменим Ad на этимологическую форму (это реально сделать? достаточно просто составить список на базе таблицы с этимологией послелогов и надеяться, что там нет двух одинаковых форм в разных языках с разной этимологией). X Aux V Y Ad, X Aux N V Y Ad и X Aux Xrefl N V Y Ad станут NOM_этимологическая форма послелога, Y Aux V X Ad, Y Aux N V X Ad и Y Aux Yrefl N V X Adстанут этимологическая форма послелога_NOM. остальные выкинем. Cколько, кстати, выкинется? И посмотрим, какая кластеризация выйдет (применим те же методы: все стимулы, общие, общие и рандомные). Если она будет иметь еще меньше смысла для language specialists, тогда наша более подробная классификация будет оправдана.

Code
et <- readxl::read_xlsx("../GM_merged_wide_adpositions_MK_0926.xlsx")
et |> 
  mutate(Guro = str_c(Guro, " ||| ", `Guro etym`),
         Looma = str_c(Looma, " ||| ", `Looma etym`),
         Mano = str_c(Mano, " ||| ", `Mano etym`),
         Dan_Gweetaa = str_c(Dan_Gweetaa, " ||| ", `Dan etym`),
         Kono = str_c(Kono, " ||| ", `Kono etym`),
         Kpelle = str_c(Kpelle, " ||| ", `Kpelle etym`),
         Bamana = str_c(Bamana, " ||| ", `Bamana etym`)) |> 
  select(number, predicate_eng, construction_type,
         Guro, Looma, Mano, Dan_Gweetaa, Kono, Kpelle, Bamana) |> 
  pivot_longer(names_to = "language", values_to = "value", Guro:Bamana) |> 
  separate_wider_delim(value, names = c("adposition", "adposition_etymology"), delim = " ||| ") |> 
  mutate(adposition = str_split(adposition, "; "),
         adposition_etymology = str_split(adposition_etymology, "; ")) |> 
  unnest_longer(c(adposition, adposition_etymology)) |> 
  na.omit() ->
  adposition_etymology

df |> 
  filter(number %in% to_keep) |> 
  left_join(adposition_etymology) |> 
  mutate(construction_type_new = str_replace(construction_type, "Ad", adposition_etymology)) |> 
  mutate(construction_type_new = case_when(construction_type_new %in% c("X Aux Y V" ,
                                                                        "Y Aux X V", 
                                                                        "X Aux N V",
                                                                        "X Aux Y N V",
                                                                        "Y Aux X N V") ~ "transitive",
                                           construction_type_new == "X Aux V loc" ~ "SBJ_loc",
                                           construction_type_new == "X be Y" ~ "SBJ_no_postposition",
                                           !is.na(adposition_etymology) & str_detect(construction_type, "^X ") ~ str_c("SBJ_", adposition_etymology),
                                           !is.na(adposition_etymology) & str_detect(construction_type, "^Y ") ~ str_c(adposition_etymology, "_SBJ"),
                                           TRUE ~ construction_type_new)) |> 
  select(number, language, adposition, construction_type, construction_type_new) |> 
  arrange(construction_type_new, number, language)  |> 
  filter(!str_detect(construction_type_new, "0"))
number language adposition construction_type construction_type_new
1 Dan_Gweetaa bhȁ N be X Y Ad N be X Y postposition-on-1-ma
60 Dan_Gweetaa zɯ̏ X Aux V Y Ad SBJ_around-zi
60 Guro zì X Aux V Y Ad SBJ_around-zi
89 Kpelle púlû X be Y Ad SBJ_back-1-pulu
89 Kpelle púlû X Aux V Y Ad SBJ_back-1-pulu
89 Looma pòlù X Aux V Y Ad SBJ_back-1-pulu
29 Bamana kɔ́ X Aux V Y Ad SBJ_back-3-xo
7 Dan_Gweetaa dhi̋ɤ X Aux N V Y Ad SBJ_before-dhie
89 Guro zuo X Aux V Y Ad SBJ_bottom-1-ju
3 Bamana ɲɛ́ X Aux V Y Ad SBJ_eye-nya
12 Dan_Gweetaa loc X Aux V loc SBJ_loc
12 Guro loc X Aux V loc SBJ_loc
12 Guro loc X Aux V loc SBJ_loc
12 Kono loc X Aux V loc SBJ_loc
12 Kpelle loc X Aux V loc SBJ_loc
12 Looma loc X Aux V loc SBJ_loc
12 Mano loc X Aux V loc SBJ_loc
12 Mano loc X Aux V loc SBJ_loc
14 Dan_Gweetaa loc X Aux V loc SBJ_loc
14 Guro loc X Aux V loc SBJ_loc
14 Kono loc X Aux V loc SBJ_loc
14 Kpelle loc X Aux V loc SBJ_loc
14 Kpelle loc X Aux V loc SBJ_loc
14 Looma loc X Aux V loc SBJ_loc
14 Mano loc X Aux V loc SBJ_loc
14 Mano loc X Aux V loc SBJ_loc
74 Dan_Gweetaa loc X Aux V loc SBJ_loc
74 Guro loc X Aux V loc SBJ_loc
74 Kono loc X Aux V loc SBJ_loc
74 Kpelle loc X Aux V loc SBJ_loc
74 Looma loc X Aux V loc SBJ_loc
74 Mano loc X Aux V loc SBJ_loc
74 Mano loc X Aux V loc SBJ_loc
75 Looma loc X Aux V loc SBJ_loc
79 Dan_Gweetaa loc X Aux V loc SBJ_loc
79 Guro loc X Aux V loc SBJ_loc
79 Kono loc X Aux V loc SBJ_loc
79 Kpelle loc X Aux V loc SBJ_loc
79 Looma loc X Aux V loc SBJ_loc
79 Mano loc X Aux V loc SBJ_loc
90 Dan_Gweetaa loc X Aux V loc SBJ_loc
90 Guro loc X Aux V loc SBJ_loc
90 Kono loc X Aux V loc SBJ_loc
90 Kpelle loc X Aux V loc SBJ_loc
90 Kpelle loc X Aux V loc SBJ_loc
90 Looma loc X Aux V loc SBJ_loc
90 Mano loc X Aux V loc SBJ_loc
100 Dan_Gweetaa NA X be Y SBJ_no_postposition
100 Guro NA X be Y SBJ_no_postposition
89 Dan_Gweetaa kèŋ̏ X Aux V Y Ad SBJ_occiput-3-kee
3 Guro leè X Aux V Y Ad SBJ_palm of hand-degere
7 Bamana lá X V Aux Y Ad SBJ_postposition at-la
19 Bamana lá X Aux V Y Ad SBJ_postposition at-la
22 Bamana lá X Aux V Y Ad SBJ_postposition at-la
25 Bamana lá X Aux V Y Ad SBJ_postposition at-la
25 Bamana lá X Aux Xrefl V Y Ad SBJ_postposition at-la
54 Bamana lá X Aux V Y Ad SBJ_postposition at-la
74 Bamana lá X Aux V Y Ad SBJ_postposition at-la
89 Bamana lá X Aux V Y Ad SBJ_postposition at-la
89 Bamana kɔ́ X Aux V Y Ad SBJ_postposition at-la
89 Bamana nɔ̀fɛ̀ X Aux V Y Ad SBJ_postposition at-la
89 Bamana kàn X Aux V Y Ad SBJ_postposition at-la
90 Bamana lá X Aux V Y Ad SBJ_postposition at-la
118 Bamana lá X Aux V Y Ad SBJ_postposition at-la
3 Looma bà X Aux V Y Ad SBJ_postposition by-ba
6 Mano bà X Aux V Y Ad SBJ_postposition by-ba
23 Guro và X Aux N V Y Ad SBJ_postposition by-ba
77 Guro và X Aux V Y Ad SBJ_postposition by-ba
77 Looma bà X Aux V Y Ad SBJ_postposition by-ba
81 Looma bà X Aux V Y Ad SBJ_postposition by-ba
99 Guro và X Aux N V Y Ad SBJ_postposition by-ba
101 Guro và X Aux N V Y Ad SBJ_postposition by-ba
121 Guro và X be Y Ad SBJ_postposition by-ba
76 Looma sù X Aux N V Y Ad SBJ_postposition in-2-sun
25 Dan_Gweetaa gɯ́ X Aux N V Y Ad SBJ_postposition in-3-gi
11 Guro ya̰ X Aux V Y Ad SBJ_postposition with-2-yan
17 Bamana yé X Aux Z V Y Ad SBJ_postposition with-2-yan
54 Guro ya̰ X Aux V Y Ad SBJ_postposition with-2-yan
80 Guro ya̰ X Aux V Y Ad SBJ_postposition with-2-yan
94 Guro ya̰ X Aux V Y Ad SBJ_postposition with-2-yan
99 Guro ya̰ X Aux N V Y Ad SBJ_postposition with-2-yan
3 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
6 Looma gà X Aux V Y Ad SBJ_postposition with-3-ga
11 Kono X Aux V Y Ad SBJ_postposition with-3-ga
11 Kpelle X Aux V Y Ad SBJ_postposition with-3-ga
23 Dan_Gweetaa ká X Aux N V Y Ad SBJ_postposition with-3-ga
23 Looma gà X Aux V Y Ad SBJ_postposition with-3-ga
35 Dan_Gweetaa ká X Aux V Y Ad SBJ_postposition with-3-ga
39 Mano ká X Aux Xrefl N V Y Ad SBJ_postposition with-3-ga
39 Mano ká X Aux Xrefl N V Y Ad SBJ_postposition with-3-ga
46 Mano ká X Aux N V Y Ad SBJ_postposition with-3-ga
46 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
54 Dan_Gweetaa ká X Aux V Y Ad SBJ_postposition with-3-ga
54 Looma gà X Aux N V Y Ad SBJ_postposition with-3-ga
76 Mano ká X Aux Xrefl N V Y Ad SBJ_postposition with-3-ga
80 Dan_Gweetaa ká X Aux Xrefl V Y Ad SBJ_postposition with-3-ga
80 Kono X Aux Xrefl V Y Ad SBJ_postposition with-3-ga
80 Kpelle X Aux Xrefl V Y Ad SBJ_postposition with-3-ga
80 Looma gà X Aux Xrefl V Y Ad SBJ_postposition with-3-ga
80 Mano ká X Aux Xrefl V Y Ad SBJ_postposition with-3-ga
80 Mano ká X Aux Xrefl V Y Ad SBJ_postposition with-3-ga
85 Kono X Aux Xrefl N V Y Ad SBJ_postposition with-3-ga
85 Kpelle X Aux Xrefl N V Y Ad SBJ_postposition with-3-ga
85 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
85 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
94 Dan_Gweetaa ká X Aux V Y Ad SBJ_postposition with-3-ga
94 Kono X Aux V Y Ad SBJ_postposition with-3-ga
94 Kpelle X Aux V Y Ad SBJ_postposition with-3-ga
94 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
99 Dan_Gweetaa ká X Aux N V Y Ad SBJ_postposition with-3-ga
100 Kono X be Y Ad SBJ_postposition with-3-ga
100 Kpelle X be Y Ad SBJ_postposition with-3-ga
100 Mano ká X be Y Ad SBJ_postposition with-3-ga
119 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
121 Mano ká X Aux V Y Ad SBJ_postposition with-3-ga
3 Dan_Gweetaa gɔ̏ X Aux V Y Ad SBJ_postposition-for-1-gon
28 Dan_Gweetaa gɔ̏ X Aux V Y Ad SBJ_postposition-for-1-gon
28 Dan_Gweetaa gɔ̏ X Aux N V Y Ad SBJ_postposition-for-1-gon
57 Dan_Gweetaa gɔ̏ X Aux N V Y Ad SBJ_postposition-for-1-gon
3 Mano lɛ̀ɛ̄ X Aux V Y Ad SBJ_postposition-for-2-len
7 Guro X Aux Xrefl N V Y Ad SBJ_postposition-for-2-len
7 Guro X Aux Xrefl N V Y Ad SBJ_postposition-for-2-len
17 Guro X Aux 3sg V Y Ad SBJ_postposition-for-2-len
17 Mano lɛ̀ɛ̄ X Aux 3sg V Y Ad SBJ_postposition-for-2-len
25 Guro X Aux Xrefl N V Y Ad SBJ_postposition-for-2-len
46 Looma bɛ̀ X Aux V Y Ad SBJ_postposition-for-3-be
58 Looma bɛ̀ X Aux V Y Ad SBJ_postposition-for-3-be
3 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
3 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
6 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
6 Guro ɓa̰ X Aux V Y Ad SBJ_postposition-on-1-ma
6 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
6 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
7 Kpelle ɓà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
7 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
9 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
11 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
11 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
17 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
17 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
17 Looma mà X Aux V Y Ad SBJ_postposition-on-1-ma
19 Guro ɓa̰ X Aux V Y Ad SBJ_postposition-on-1-ma
22 Guro ɓa̰ X Aux V Y Ad SBJ_postposition-on-1-ma
22 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
22 Kpelle ɓà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
22 Kpelle ɓà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
22 Looma bà X Aux V Y Ad SBJ_postposition-on-1-ma
22 Looma bà X Aux V Y Ad SBJ_postposition-on-1-ma
22 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
22 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
22 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
25 Kono mà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
25 Kpelle ɓà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
25 Looma mà X Aux N V Y Ad SBJ_postposition-on-1-ma
25 Looma sù X Aux N V Y Ad SBJ_postposition-on-1-ma
25 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
25 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
29 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
29 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
29 Looma mà X Aux V Y Ad SBJ_postposition-on-1-ma
29 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
29 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
35 Guro ɓa̰ X Aux V Y Ad SBJ_postposition-on-1-ma
35 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
35 Kpelle ɓà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
35 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
40 Guro ɓa̰ X Aux N V Y Ad SBJ_postposition-on-1-ma
40 Kpelle ɓà X Aux N V Y Ad SBJ_postposition-on-1-ma
41 Guro ɓa̰ X Aux N V Y Ad SBJ_postposition-on-1-ma
46 Guro ɓa̰ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
50 Guro ɓa̰ X Aux Xrefl V Y Ad SBJ_postposition-on-1-ma
53 Bamana kàn X Aux V Y Ad SBJ_postposition-on-1-ma
53 Bamana mà X Aux V Y Ad SBJ_postposition-on-1-ma
53 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
53 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
53 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
55 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
60 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
60 Looma mà X Aux V Y Ad SBJ_postposition-on-1-ma
60 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
75 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
76 Kono mà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
76 Looma mà X Aux N V Y Ad SBJ_postposition-on-1-ma
77 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
77 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
77 Mano mɔ̀ X Aux N V Y Ad SBJ_postposition-on-1-ma
77 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
81 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
81 Kono mà X Aux V Y Ad SBJ_postposition-on-1-ma
81 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
81 Kpelle ɓà X Aux V Y Ad SBJ_postposition-on-1-ma
81 Mano mɔ̀ X Aux V Y Ad SBJ_postposition-on-1-ma
91 Dan_Gweetaa bhȁ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
91 Kpelle ɓà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
91 Looma mà X Aux N V Y N Ad SBJ_postposition-on-1-ma
91 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
93 Looma mà X Aux N V Y Ad SBJ_postposition-on-1-ma
98 Bamana mà X Aux V Y N Ad SBJ_postposition-on-1-ma
98 Dan_Gweetaa bhȁ X Aux V Y Ad SBJ_postposition-on-1-ma
101 Dan_Gweetaa bhȁ X Aux N V Y Ad SBJ_postposition-on-1-ma
101 Kpelle ɓà X Aux N V Y Ad SBJ_postposition-on-1-ma
101 Looma mà X Aux N V Y Ad SBJ_postposition-on-1-ma
101 Mano mɔ̀ X Aux N V Y Ad SBJ_postposition-on-1-ma
110 Looma mà X Aux N V Y Ad SBJ_postposition-on-1-ma
117 Kono mà X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
117 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
117 Mano mɔ̀ X Aux Xrefl N V Y Ad SBJ_postposition-on-1-ma
53 Bamana kàn X Aux V Aux V Y Ad SBJ_postposition-on-2-xan
79 Bamana kàn X Aux V Y Ad SBJ_postposition-on-2-xan
6 Bamana fɛ̀ X V Aux Y Ad SBJ_side-3-fe
38 Bamana bólo X be X Ad SBJ_side-3-fe
38 Bamana fɛ̀ X be X Ad SBJ_side-3-fe
46 Bamana fɛ̀ X Aux Y Ad SBJ_side-3-fe
117 Dan_Gweetaa pi̋ɤ X Aux N V Y Ad SBJ_side-3-fe
121 Bamana fɛ̀ X be Y Ad SBJ_side-3-fe
121 Dan_Gweetaa pi̋ɤ X be Y Ad SBJ_side-3-fe
121 Mano píé X be Y Ad SBJ_side-3-fe
14 Bamana kɔ́nɔ X Aux V Y Ad SBJ_stomach, belly-2-xono
7 Kono lá X Aux V Y Ad SBJ_surface-1-ta
7 Kpelle lá X Aux V Y Ad SBJ_surface-1-ta
7 Mano là X Aux V Y Ad SBJ_surface-1-ta
18 Guro ta X Aux V Y Ad SBJ_surface-1-ta
53 Dan_Gweetaa tȁ X Aux V Y Ad SBJ_surface-1-ta
53 Guro ta X Aux V Y Ad SBJ_surface-1-ta
68 Mano là X Aux V Y Ad SBJ_surface-1-ta
75 Dan_Gweetaa tȁ X Aux V Y Ad SBJ_surface-1-ta
75 Guro ta X Aux V Y Ad SBJ_surface-1-ta
81 Guro ta X Aux V Y Ad SBJ_surface-1-ta
77 Kono pɔ̀ X Aux N V Y Ad SBJ_trace-1-pon
121 Kono pɔ̀ X be Y Ad SBJ_trace-1-pon
121 Kpelle pɔ́ X be Y Ad SBJ_trace-1-pon
117 Bamana kɔ́rɔ X Aux V Y Ad SBJ_under-1-kodo
23 Kpelle NA X+Y Aux N V X+Y Aux N V
23 Mano NA X+Y Aux N V X+Y Aux N V
99 Kono NA X+Y Aux N V X+Y Aux N V
99 Kpelle NA X+Y Aux N V X+Y Aux N V
99 Mano NA X+Y Aux N V X+Y Aux N V
99 Mano NA X+Y Aux N V X+Y Aux N V
98 Looma mà X+Y Aux N V Z Ad X+Y Aux N V Z postposition-on-1-ma
11 Bamana NA X+Y Aux V X+Y Aux V
23 Bamana NA X+Y Aux V X+Y Aux V
23 Kono NA X+Y Aux V X+Y Aux V
23 Kpelle NA X+Y Aux V X+Y Aux V
23 Looma NA X+Y Aux V X+Y Aux V
98 Guro NA X+Y Aux V X+Y Aux V
98 Kpelle NA X+Y Aux V X+Y Aux V
99 Bamana NA X+Y Aux V X+Y Aux V
98 Mano bà X+Y Aux V X+Yrefl Ad X+Y Aux V X+Yrefl postposition by-ba
98 Mano bà X+Y Aux V X+Yrefl Ad X+Y Aux V X+Yrefl postposition by-ba
99 Mano mɔ̀ X+Y Aux V X+Yrefl Ad X+Y Aux V X+Yrefl postposition-on-1-ma
6 Mano bà X+Y Aux V X+Yrefl recp Ad X+Y Aux V X+Yrefl recp postposition by-ba
98 Kono hù X+Y Aux V Z Ad X+Y Aux V Z postposition in-2-sun
98 Mano NA X+Y Aux X+Yrefl N V X+Y Aux X+Yrefl N V
98 Mano NA X+Y N Aux V X+Y N Aux V
67 Guro ya̰ Xinal N Aux N V Y Ad Xinal N Aux N V Y postposition with-2-yan
76 Bamana lá Xinal N Aux V Y Ad Xinal N Aux V Y postposition at-la
83 Guro và Xinal N Aux V Y Ad Xinal N Aux V Y postposition by-ba
117 Guro và Xinal N Aux V Y Ad Xinal N Aux V Y postposition by-ba
29 Dan_Gweetaa ká Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
54 Kono Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
54 Kpelle Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
54 Mano ká Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
54 Mano ká Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
76 Dan_Gweetaa ká Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
118 Looma gà Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
118 Mano ká Xinal N Aux V Y Ad Xinal N Aux V Y postposition with-3-ga
3 Mano mɔ̀ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
7 Mano mɔ̀ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
29 Guro ɓa̰ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
29 Mano mɔ̀ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
57 Guro ɓa̰ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
83 Guro ɓa̰ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
83 Kono mà Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
83 Kpelle ɓà Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
83 Looma mà Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
117 Looma mà Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
118 Guro ɓa̰ Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
118 Kpelle ɓà Xinal N Aux V Y Ad Xinal N Aux V Y postposition-on-1-ma
117 Kpelle pɔ́ Xinal N Aux V Y Ad Xinal N Aux V Y trace-1-pon
51 Bamana NA Xinal N V Y Xinal N V Y
51 Mano NA Xinal N be Xinal N be
100 Bamana yé Xinal N be Y Ad Xinal N be Y postposition with-2-yan
51 Kono Xinal N be Y Ad Xinal N be Y postposition with-3-ga
51 Kpelle Xinal N be Y Ad Xinal N be Y postposition with-3-ga
51 Looma gà Xinal N be Y Ad Xinal N be Y postposition with-3-ga
100 Looma gà Xinal N be Y Ad Xinal N be Y postposition with-3-ga
18 Mano mɔ̀ Xinal N be Y Ad Xinal N be Y postposition-on-1-ma
76 Kpelle ɲá Xinal N be Y Ad Xinal N be Y surface-2-ga
1 Dan_Gweetaa NA Xinal Y Aux V Xinal Y Aux V
1 Guro NA Xinal Y Aux V Xinal Y Aux V
1 Looma NA Xinal Y Aux V Xinal Y Aux V
1 Mano NA Xinal Y Aux V Xinal Y Aux V
1 Bamana NA Xinal Y Aux X V Xinal Y Aux X V
1 Dan_Gweetaa NA Xinal Y Aux X V Xinal Y Aux X V
1 Kono NA Xinal Y Aux Xrefl V Xinal Y Aux Xrefl V
1 Kpelle NA Xinal Y Aux Xrefl V Xinal Y Aux Xrefl V
1 Kpelle NA Xinal Y Aux Xrefl V Xinal Y Aux Xrefl V
1 Mano NA Xinal Y Aux Xrefl V Xinal Y Aux Xrefl V
1 Dan_Gweetaa NA Xinal Y N Aux V Xinal Y N Aux V
57 Kpelle ɓà Yinal N Aux V X Ad Yinal N Aux V X postposition-on-1-ma
58 Mano píé Yinal N Aux V X Ad Yinal N Aux V X side-3-fe
25 Mano là Yinal N Aux V X Ad Yinal N Aux V X surface-1-ta
76 Mano là Yinal N Aux V X Ad Yinal N Aux V X surface-1-ta
76 Mano là Yinal N Aux V X Ad Yinal N Aux V X surface-1-ta
46 Dan_Gweetaa NA Yinal N Aux X V Yinal N Aux X V
58 Dan_Gweetaa NA Yinal N Aux X V Yinal N Aux X V
119 Dan_Gweetaa NA Yinal N Aux X V Yinal N Aux X V
121 Dan_Gweetaa NA Yinal N Aux X V Yinal N Aux X V
110 Dan_Gweetaa gɯ́ Yinal N be X N Ad Yinal N be X N postposition in-3-gi
67 Looma bɛ̀ Yinal N be X Ad Yinal N be X postposition-for-3-be
46 Kono mà Yinal N be X Ad Yinal N be X postposition-on-1-ma
46 Kpelle ɓà Yinal N be X Ad Yinal N be X postposition-on-1-ma
46 Mano mɔ̀ Yinal N be X Ad Yinal N be X postposition-on-1-ma
58 Kono mà Yinal N be X Ad Yinal N be X postposition-on-1-ma
67 Dan_Gweetaa bhȁ Yinal N be X Ad Yinal N be X postposition-on-1-ma
67 Kono mà Yinal N be X Ad Yinal N be X postposition-on-1-ma
67 Kpelle ɓà Yinal N be X Ad Yinal N be X postposition-on-1-ma
67 Mano mɔ̀ Yinal N be X Ad Yinal N be X postposition-on-1-ma
119 Kono mà Yinal N be X Ad Yinal N be X postposition-on-1-ma
119 Kpelle ɓà Yinal N be X Ad Yinal N be X postposition-on-1-ma
121 Looma mà Yinal N be X Ad Yinal N be X postposition-on-1-ma
67 Bamana nɔ̀fɛ̀ Yinal N be X Ad Yinal N be X trace-1-pon
57 Kono mà Yinal N be.neg X Ad Yinal N be.neg X postposition-on-1-ma
57 Kpelle ɓà Yinal N be.neg X Ad Yinal N be.neg X postposition-on-1-ma
57 Mano mɔ̀ Yinal N be.neg X Ad Yinal N be.neg X postposition-on-1-ma
51 Mano lɛ̀ɛ̄ Z Aux 3sg V X Ad Y Z Aux 3sg V X postposition-for-2-len Y
1 Dan_Gweetaa loc Z Aux V X Y loc Z Aux V X Y loc
51 Dan_Gweetaa NA Z Aux X V Y Z Aux X V Y
51 Guro NA Z Aux X V Y Z Aux X V Y
51 Dan_Gweetaa NA Z Aux X Y V Z Aux X Y V
103 Bamana kɔ́ Y Aux V X Ad back-3-xo_SBJ
18 Kono yêi Y be X Ad hand, arm-2-je_SBJ
18 Kpelle yêi Y be X Ad hand, arm-2-je_SBJ
38 Kono yêi Y be X Ad hand, arm-2-je_SBJ
38 Kpelle yêi Y be X Ad hand, arm-2-je_SBJ
38 Guro leè Y be X Ad palm of hand-degere_SBJ
2 Bamana lá Y be X Ad postposition at-la_SBJ
57 Mano gé Y be X Ad postposition in-3-gi_SBJ
76 Guro jì Y Aux V X Ad postposition in-3-gi_SBJ
58 Bamana yé Y Aux V X Ad postposition with-2-yan_SBJ
119 Bamana yé Y Aux V X Ad postposition with-2-yan_SBJ
1 Dan_Gweetaa ká Y Aux V X Ad postposition with-3-ga_SBJ
118 Dan_Gweetaa ká Y Aux V X N Ad N Ad postposition with-3-ga_SBJ
38 Dan_Gweetaa gɔ̏ Y be X Ad postposition-for-1-gon_SBJ
58 Guro Y Aux V X Ad postposition-for-2-len_SBJ
58 Mano lɛ̀ɛ̄ Y Aux V X Ad postposition-for-2-len_SBJ
119 Guro Y Aux V X Ad postposition-for-2-len_SBJ
2 Dan_Gweetaa bhȁ Y Aux V X Ad postposition-on-1-ma_SBJ
2 Guro ɓa̰ Y be X Ad postposition-on-1-ma_SBJ
2 Kono mà Y be X Ad postposition-on-1-ma_SBJ
2 Kpelle ɓà Y be X Ad postposition-on-1-ma_SBJ
2 Looma mà Y be X Ad postposition-on-1-ma_SBJ
2 Mano mɔ̀ Y be X Ad postposition-on-1-ma_SBJ
118 Guro ɓa̰ Y Aux X N V Xrefl Ad postposition-on-1-ma_SBJ
2 Bamana NA Y Aux X V transitive
2 Dan_Gweetaa bhȁ Y Aux X V transitive
4 Bamana NA X Aux Y V transitive
4 Dan_Gweetaa NA X Aux Y V transitive
4 Guro NA X Aux Y V transitive
4 Kono NA X Aux Y V transitive
4 Kpelle NA X Aux Y V transitive
4 Looma NA X Aux Y V transitive
4 Mano NA X Aux Y V transitive
8 Bamana NA X Aux Y V transitive
8 Dan_Gweetaa NA X Aux Y V transitive
8 Guro NA X Aux Y V transitive
8 Kono NA X Aux Y V transitive
8 Kpelle NA X Aux Y V transitive
8 Looma NA X Aux Y V transitive
8 Mano NA X Aux Y V transitive
9 Bamana NA X Aux Y V transitive
9 Dan_Gweetaa NA X Aux Y V transitive
9 Guro NA X Aux Y V transitive
9 Kono NA X Aux Y V transitive
9 Kpelle NA X Aux Y V transitive
9 Looma NA X Aux Y V transitive
9 Mano NA X Aux Y V transitive
11 Bamana NA X Aux Y V transitive
11 Dan_Gweetaa NA X Aux Y V transitive
11 Looma NA X Aux Y V transitive
16 Bamana NA X Aux Y V transitive
16 Dan_Gweetaa NA X Aux Y V transitive
16 Guro NA X Aux Y V transitive
16 Kono NA X Aux Y V transitive
16 Kpelle NA X Aux Y V transitive
16 Kpelle NA X Aux Y N V transitive
16 Kpelle NA X Aux Y V transitive
16 Looma NA X Aux Y V transitive
16 Mano NA X Aux Y V transitive
16 Mano NA X Aux Y V transitive
18 Dan_Gweetaa NA X Aux Y V transitive
18 Guro NA X Aux Y V transitive
18 Looma NA X Aux Y V transitive
19 Dan_Gweetaa NA X Aux Y V transitive
19 Kono NA X Aux Y V transitive
19 Kpelle NA X Aux Y V transitive
19 Looma NA X Aux Y V transitive
19 Mano NA X Aux Y V transitive
23 Bamana NA X Aux Y V transitive
26 Bamana NA X Aux Y V transitive
26 Dan_Gweetaa NA X Aux Y V transitive
26 Guro NA X Aux Y V transitive
26 Kono NA X Aux Y V transitive
26 Kpelle NA X Aux Y V transitive
26 Looma NA X Aux Y V transitive
26 Mano NA X Aux Y V transitive
27 Bamana NA X Aux Y V transitive
27 Dan_Gweetaa NA X Aux Y V transitive
27 Guro NA X Aux Y V transitive
27 Kono NA X Aux Y V transitive
27 Kpelle NA X Aux Y V transitive
27 Looma NA X Aux Y V transitive
27 Mano NA X Aux Y V transitive
28 Bamana NA X Aux Y V transitive
28 Bamana NA X Aux Y V transitive
28 Guro NA X Aux Y V transitive
28 Guro NA X Aux Y N V transitive
28 Kono NA X Aux Y N V transitive
28 Kpelle NA X Aux Y N V transitive
28 Looma NA X Aux Y V transitive
28 Mano NA X Aux Y N V transitive
29 Mano NA X Aux Y V transitive
31 Bamana NA X Aux Y V transitive
31 Dan_Gweetaa NA X Aux Y V transitive
31 Guro NA X Aux Y V transitive
31 Kono NA X Aux Y V transitive
31 Kpelle NA X Aux Y V transitive
31 Looma NA X Aux Y V transitive
31 Mano NA X Aux Y N V transitive
32 Bamana NA X Aux Y V transitive
32 Dan_Gweetaa NA X Aux Y V transitive
32 Guro NA X Aux Y V transitive
32 Kono NA X Aux Y V transitive
32 Kpelle NA X Aux Y V transitive
32 Looma NA X Aux Y V transitive
32 Mano NA X Aux Y V transitive
33 Bamana NA X Aux Y V transitive
33 Dan_Gweetaa NA X Aux Y V transitive
33 Guro NA X Aux Y V transitive
33 Kono NA X Aux Y V transitive
33 Kpelle NA X Aux Y V transitive
33 Looma NA X Aux Y V transitive
33 Mano NA X Aux Y V transitive
34 Bamana NA X Aux Y V transitive
34 Dan_Gweetaa NA X Aux Y V transitive
34 Guro NA X Aux Y V transitive
34 Kono NA X Aux Y V transitive
34 Kpelle NA X Aux Y V transitive
34 Kpelle NA X Aux Y V transitive
34 Looma NA X Aux Y V transitive
34 Mano NA X Aux Y V transitive
34 Mano NA X Aux Y V transitive
34 Mano NA X Aux Y V transitive
35 Bamana NA X Aux Y V transitive
35 Kpelle NA X Aux Y N V transitive
35 Looma NA X Aux Y N V transitive
35 Mano NA X Aux Y N V transitive
36 Bamana NA X Aux Y V transitive
36 Dan_Gweetaa NA X Aux Y N V transitive
36 Guro NA X Aux Y V transitive
36 Kono NA X Aux Y V transitive
36 Kpelle NA X Aux Y V transitive
36 Kpelle NA X Aux Y V transitive
36 Looma NA X Aux Y V transitive
36 Mano NA X Aux Y V transitive
36 Mano NA X Aux Y N V transitive
39 Bamana NA X Aux Y V transitive
39 Dan_Gweetaa NA X Aux Y V transitive
39 Guro NA X Aux Y V transitive
39 Kono NA X Aux Y V transitive
39 Kpelle NA X Aux Y V transitive
39 Looma NA X Aux Y V transitive
39 Mano NA X Aux Y V transitive
40 Bamana NA X Aux Y V transitive
40 Bamana NA X Aux Y V transitive
40 Dan_Gweetaa NA X Aux Y V transitive
40 Kono NA X Aux Y N V transitive
40 Looma NA X Aux Y V transitive
40 Mano NA X Aux Y V transitive
41 Bamana NA X Aux Y V transitive
41 Dan_Gweetaa NA X Aux Y V transitive
41 Kono NA X Aux Y V transitive
41 Kpelle NA X Aux Y V transitive
41 Looma NA X Aux Y V transitive
41 Mano NA X Aux Y N V transitive
43 Bamana NA X Aux Y V transitive
43 Dan_Gweetaa NA X Aux Y V transitive
43 Guro NA X Aux Y V transitive
43 Kono NA X Aux Y V transitive
43 Kpelle NA X Aux Y V transitive
43 Looma NA X Aux Y V transitive
43 Looma NA X Aux Y V transitive
43 Mano NA X Aux Y V transitive
44 Bamana NA X Aux Y V transitive
44 Dan_Gweetaa NA X Aux Y V transitive
44 Guro NA X Aux Y V transitive
44 Kono NA X Aux Y N V transitive
44 Kpelle NA X Aux Y N V transitive
44 Looma NA X Aux Y V transitive
44 Mano NA X Aux Y V transitive
44 Mano NA X Aux Y V transitive
46 Bamana NA X Aux Y V transitive
46 Mano NA X Aux Y N V transitive
46 Mano NA X Aux Y V transitive
46 Mano NA X Aux Y N V transitive
46 Mano NA X Aux Y N V transitive
49 Bamana NA X Aux Y V transitive
49 Dan_Gweetaa NA X Aux Y V transitive
49 Guro NA X Aux Y V transitive
49 Kono NA X Aux Y N V transitive
49 Kpelle NA X Aux Y V transitive
49 Kpelle NA X Aux Y N V transitive
49 Looma NA X Aux Y V transitive
49 Mano NA X Aux Y V transitive
49 Mano NA X Aux Y N V transitive
50 Bamana NA X Aux Y V transitive
50 Dan_Gweetaa NA X Aux Y V transitive
50 Guro NA X Aux Y V transitive
50 Kono NA X Aux Y V transitive
50 Kpelle NA X Aux Y V transitive
50 Looma NA X Aux Y V transitive
50 Mano NA X Aux Y V transitive
53 Looma NA X Aux Y V transitive
55 Bamana NA X Aux Y V transitive
55 Dan_Gweetaa NA X Aux Y V transitive
55 Guro NA X Aux Y V transitive
55 Kono NA X Aux Y V transitive
55 Kpelle NA X Aux Y V transitive
55 Looma NA X Aux Y V transitive
55 Mano NA X Aux Y V transitive
57 Bamana NA Y Aux X V transitive
57 Looma NA X Aux Y N V transitive
58 Kpelle NA Y Aux X N V transitive
58 Mano NA Y Aux X N V transitive
58 Mano NA Y Aux X N V transitive
60 Bamana NA X Aux Y V transitive
60 Bamana NA X Aux Y V transitive
60 Kpelle NA X Aux Y N V transitive
60 Mano NA X Aux Y N V transitive
63 Bamana NA X Aux Y V transitive
63 Bamana NA X Aux Y N V transitive
63 Dan_Gweetaa NA X Aux Y V transitive
63 Guro NA X Aux Y N V transitive
63 Kono NA X Aux Y N V transitive
63 Kpelle NA X Aux Y N V transitive
63 Looma NA X Aux Y N V transitive
63 Mano NA X Aux Y N V transitive
66 Bamana NA X Aux Y V transitive
66 Guro NA X Aux Y V transitive
66 Kono NA X Aux N V transitive
66 Kpelle NA X Aux N V transitive
66 Looma NA X Aux Y V transitive
66 Mano NA X Aux N V transitive
68 Bamana NA X Aux Y V transitive
68 Dan_Gweetaa NA X Aux Y V transitive
68 Guro NA X Aux Y V transitive
68 Kono NA X Aux Y N V transitive
68 Kpelle NA X Aux Y V transitive
68 Looma NA X Aux Y N V transitive
68 Mano NA X Aux Y N V transitive
68 Mano NA X Aux Y V transitive
69 Bamana NA X Aux Y V transitive
69 Dan_Gweetaa NA X Aux N V transitive
69 Guro NA X Aux Y V transitive
69 Kono NA X Aux N V transitive
69 Kpelle NA X Aux N V transitive
69 Looma NA X Aux N V transitive
69 Mano NA X Aux N V transitive
70 Bamana NA X Aux Y V transitive
70 Dan_Gweetaa NA X Aux Y N V transitive
70 Guro NA X Aux Y V transitive
70 Kono NA X Aux Y V transitive
70 Kpelle NA X Aux Y V transitive
70 Looma NA X Aux Y V transitive
70 Looma NA X Aux Y V transitive
70 Mano NA X Aux Y V transitive
71 Bamana NA X Aux Y V transitive
71 Dan_Gweetaa NA X Aux Y V transitive
71 Guro NA X Aux Y V transitive
71 Kono NA X Aux Y V transitive
71 Kpelle NA X Aux Y V transitive
71 Looma NA X Aux Y V transitive
71 Mano NA X Aux Y V transitive
72 Bamana NA X Aux Y V transitive
72 Dan_Gweetaa NA X Aux Y V transitive
72 Guro NA X Aux Y N V transitive
72 Kono NA X Aux Y V transitive
72 Kpelle NA X Aux Y V transitive
72 Looma NA X Aux Y V transitive
72 Mano NA X Aux Y V transitive
75 Bamana NA X Aux Y N V transitive
75 Guro NA X Aux Y N V transitive
75 Kpelle NA X Aux Y N V transitive
76 Mano NA X Aux Y N V transitive
77 Bamana NA X Aux Y V transitive
77 Dan_Gweetaa NA X Aux Y N V transitive
77 Kpelle NA X Aux Y N V transitive
78 Bamana NA X Aux Y V transitive
78 Dan_Gweetaa NA X Aux Y V transitive
78 Dan_Gweetaa NA X Aux Y N V transitive
78 Guro NA X Aux Y V transitive
78 Kono NA X Aux Y V transitive
78 Kpelle NA X Aux Y N V transitive
78 Looma NA X Aux Y N V transitive
78 Mano NA X Aux Y V transitive
80 Bamana NA Y Aux X V transitive
83 Bamana NA Y Aux X N V transitive
83 Dan_Gweetaa NA Y Aux X N V transitive
85 Bamana NA X Aux Y V transitive
85 Bamana NA X Aux Y V transitive
85 Dan_Gweetaa NA X Aux Y V transitive
85 Guro NA X Aux Y V transitive
85 Kpelle NA X Aux Y V transitive
85 Looma NA X Aux Y V transitive
85 Mano NA X Aux Y V transitive
89 Mano NA X Aux Y V transitive
89 Mano NA X Aux Y N V transitive
90 Dan_Gweetaa NA X Aux Y V transitive
91 Bamana NA X Aux Y V transitive
91 Guro NA X Aux Y V transitive
91 Kono NA X Aux Y N V transitive
91 Kpelle NA X Aux Y N V transitive
93 Bamana NA X Aux Y V transitive
93 Dan_Gweetaa NA X Aux Y V transitive
93 Guro NA X Aux Y V transitive
93 Kono NA X Aux Y V transitive
93 Kpelle NA X Aux Y N V transitive
93 Looma NA X Aux Y V transitive
93 Mano NA X Aux Y N V transitive
93 Mano NA X Aux Y V transitive
94 Bamana NA X Aux Y V transitive
96 Bamana NA X Aux Y V transitive
96 Dan_Gweetaa NA X Aux Y V transitive
96 Guro NA X Aux Y V transitive
96 Kono NA X Aux Y V transitive
96 Kpelle NA X Aux Y V transitive
96 Looma NA X Aux Y V transitive
96 Mano NA X Aux Y V transitive
101 Bamana NA X Aux Y V transitive
101 Kono NA X Aux Y V transitive
102 Bamana NA X Aux Y V transitive
102 Dan_Gweetaa NA X Aux Y V transitive
102 Guro NA X Aux Y V transitive
102 Kono NA X Aux Y N V transitive
102 Kpelle NA X Aux Y V transitive
102 Looma NA X Aux Y V transitive
102 Mano NA X Aux Y V transitive
103 Dan_Gweetaa NA X Aux Y V transitive
103 Guro NA X Aux Y V transitive
103 Guro NA X Aux Y V transitive
103 Kono NA X Aux Y V transitive
103 Kpelle NA X Aux Y V transitive
103 Looma NA X Aux Y V transitive
103 Mano NA X Aux Y V transitive
105 Bamana NA X Aux Y V transitive
105 Dan_Gweetaa NA X Aux Y V transitive
105 Guro NA X Aux Y V transitive
105 Kono NA X Aux Y V transitive
105 Kpelle NA X Aux Y V transitive
105 Looma NA X Aux Y V transitive
105 Mano NA X Aux Y V transitive
106 Bamana NA X Aux Y V transitive
106 Bamana NA X Aux Y V transitive
106 Dan_Gweetaa NA X Aux Y V transitive
106 Guro NA X Aux Y V transitive
106 Kono NA X Aux Y V transitive
106 Kpelle NA X Aux Y V transitive
106 Looma NA X Aux Y V transitive
106 Mano NA X Aux Y V transitive
108 Bamana NA X Aux Y V transitive
108 Dan_Gweetaa NA X Aux Y N V transitive
108 Guro NA X Aux Y N V transitive
108 Kono NA X Aux Y V transitive
108 Kpelle NA X Aux Y V transitive
108 Kpelle NA X Aux Y V transitive
108 Looma NA X Aux Y V transitive
108 Mano NA X Aux Y V transitive
108 Mano NA X Aux Y V transitive
109 Bamana NA X Aux Y V transitive
109 Dan_Gweetaa NA X Aux Y V transitive
109 Guro NA X Aux Y V transitive
109 Kono NA X Aux Y V transitive
109 Kpelle NA X Aux Y V transitive
109 Looma NA X Aux Y V transitive
109 Mano NA X Aux Y V transitive
109 Mano NA X Aux Y N V transitive
110 Bamana NA X Aux Y V transitive
110 Guro NA X Aux Y V transitive
110 Kono NA X Aux Y N V transitive
110 Kpelle NA X Aux Y N V transitive
110 Mano NA X Aux Y N V transitive
110 Mano NA X Aux Y N V transitive
118 Bamana NA Y Aux X V transitive
118 Guro NA Y Aux X N V transitive
Code
library(tidyverse)
mano_kpelle_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "mano + kpelle")

mano_kpelle_pre |> 
  rename(number = Number,
         construction = `Mano construction`,
         construction_type = `Mano extended construction type`,
         adposition = `Mano Postposition`,
         verb = `Mano verb`,
         verb_meaning = `Mano Complex verb meaning`) |> 
  mutate(language = "Mano") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  mano

mano_kpelle_pre |> 
  rename(number = Number,
         construction = `Kpelle construction`,
         construction_type = `Kpelle extended construction type`,
         adposition = `Kpelle postposition`,
         verb = `Kpelle verb`,
         verb_meaning = `Kpelle complex verb meaning`) |> 
  mutate(language = "Kpelle") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  kpelle

rm(mano_kpelle_pre)

kono_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "kono")

kono_pre |> 
  rename(number = Number,
         construction = `Kono construction...8`,
         construction_type = `Kono extended construction`,
         adposition = `Kono adposition`,
         verb = `Kono verb`,
         verb_meaning = `Kono complex verb`) |> 
  mutate(language = "Kono") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  kono

rm(kono_pre)

# I renamed one of the columns to "Looma extended construction"
looma_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Looma")

looma_pre |> 
  rename(number = Number,
         construction = `Looma construction`,
         construction_type = `Looma extended construction`,
         adposition = `Looma Postposition`,
         verb = `Looma Verb`,
         verb_meaning = `Looma Complex Verb`) |> 
  mutate(language = "Looma") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  looma

rm(looma_pre)

# I renamed one of the columns to "Guro extended construction"
guro_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Guro")

guro_pre |> 
  rename(number = Number,
         construction = `Guro construction`,
         construction_type = `Guro extended construction`,
         adposition = `Guro Postposition`,
         verb = `Guro Verb`,
         verb_meaning = `Guro Complex Verb`) |> 
  mutate(language = "Guro") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  guro

rm(guro_pre)

dan_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Dan Gweetaa from Vydrin")

dan_pre |> 
  rename(number = Number,
         construction = `Dan construction`,
         construction_type = `Dan extended construction type`,
         adposition = `Dan Postposition`,
         verb = `Dan Verb`, 
         verb_meaning = `Dan complex verb meaning`) |> 
  mutate(language = "Dan_Gweetaa") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  dan

rm(dan_pre)

bamana_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Bambara Vydrin")
bamana_pre |> 
  rename(number = Number,
         construction = `Bamana construction`,
         construction_type = `Bamana extended construction type`,
         adposition = `Bamana Postposition`,
         verb = `Bamana Verb`, 
         verb_meaning = `Bamana complex verb meaning`) |> 
  mutate(language = "Bamana") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  bamana

guro |> 
  bind_rows(kono, kpelle, looma, mano, dan, bamana) |>
  mutate(construction_type = str_replace_all(construction_type, " N N ", " N ")) |>
  write_csv("../GM_merged.csv", na = "")

readxl::read_xlsx("../say_stimuli.xlsx", sheet = "stimuli_general") |> 
  select(Number, `English predicate`, `Stimulus sentence (SAY)`, `Semantic types`) |> 
  rename(number = Number,
         predicate_eng = `English predicate`,
         stimuli = `Stimulus sentence (SAY)`,
         semantic_type = `Semantic types`) ->
  stimuli

read_csv("../GM_merged.csv") |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  group_by(number) |> 
  add_count(construction_type) |> 
  arrange(number, desc(n)) |> 
  select(-adposition) |> 
  group_by(number, construction_type, language, n) |> 
  reframe(construction = str_c(construction, collapse = "; ")) |> 
  pivot_wider(names_from = language, values_from = construction, values_fill = "") |> 
  group_by(number) |> 
  mutate(id = 1:n()) |>
  ungroup() |> 
  left_join(stimuli) |> 
  select(number, semantic_type, predicate_eng, stimuli, id, n, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kono, Kpelle, Bamana, Kono) |> 
  writexl::write_xlsx("../GM_merged_wide.xlsx")

read_csv("../GM_merged.csv") |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  select(-construction) |>
  distinct(number, construction_type, language, adposition) |> 
  group_by(number, construction_type, language) |> 
  reframe(adposition = str_c(adposition, collapse = "; ")) |> 
  pivot_wider(names_from = language, values_from = adposition, values_fill = "") |> 
  group_by(number) |> 
  mutate(id = 1:n()) |>
  ungroup() |> 
  left_join(stimuli) |> 
  select(number, semantic_type, predicate_eng, stimuli, id, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kono, Kpelle, Bamana, Kono) |> 
  writexl::write_xlsx("../GM_merged_wide_adpositions.xlsx")

read_csv("../GM_merged.csv") |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  select(-construction) |>
  distinct(number, construction_type, language, verb) |> 
  group_by(number, construction_type, language) |> 
  reframe(verb = str_c(verb, collapse = "; ")) |> 
  pivot_wider(names_from = language, values_from = verb, values_fill = "") |> 
  group_by(number) |> 
  mutate(id = 1:n()) |>
  ungroup() |> 
  left_join(stimuli) |> 
  select(number, predicate_eng, stimuli, id, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kpelle, Bamana, Kono) |> 
  writexl::write_xlsx("../GM_merged_wide_verb.xlsx")

read_csv("../GM_merged.csv") |>
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(verb_meaning)) |>
  select(-construction) |>
  distinct(number, construction_type, language, verb, verb_meaning) |> 
  mutate(verb_meaning = str_c(verb_meaning, " (", verb, ")")) |> 
  group_by(number, construction_type, language) |>
  reframe(verb_meaning = str_c(verb_meaning, collapse = "; ")) |>
  pivot_wider(names_from = language, values_from = verb_meaning, values_fill = "") |>
  group_by(number) |>
  mutate(id = 1:n()) |>
  ungroup() |>
  left_join(stimuli) |>
  select(number, predicate_eng, stimuli, id, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kpelle, Bamana, Kono) |>
  writexl::write_xlsx("../GM_merged_wide_verb_meaning.xlsx")